summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGregory Wlodarek <gregory.wlodarek@mongodb.com>2019-04-29 20:18:42 -0400
committerGregory Wlodarek <gregory.wlodarek@mongodb.com>2019-04-29 20:25:09 -0400
commitabf37f69a45009f44f275e03e2f07d2d496d5b8d (patch)
tree5f58d5c227e57b44341b1be900b0feb1dc0fb3de
parent7ea05d8684052198c595dee0b9a9cabf652e904d (diff)
downloadmongo-abf37f69a45009f44f275e03e2f07d2d496d5b8d.tar.gz
SERVER-30356 Improve error reporting for validation
-rw-r--r--jstests/core/geo_s2sparse.js4
-rw-r--r--jstests/core/index_partial_create_drop.js2
-rw-r--r--jstests/core/index_partial_write_ops.js2
-rw-r--r--src/mongo/db/catalog/collection_impl.cpp85
-rw-r--r--src/mongo/db/catalog/index_consistency.cpp273
-rw-r--r--src/mongo/db/catalog/index_consistency.h119
-rw-r--r--src/mongo/db/catalog/private/record_store_validate_adaptor.cpp34
-rw-r--r--src/mongo/db/commands/validate.cpp2
-rw-r--r--src/mongo/db/storage/record_store.h2
9 files changed, 403 insertions, 120 deletions
diff --git a/jstests/core/geo_s2sparse.js b/jstests/core/geo_s2sparse.js
index 3677e98f84c..57f4f73fa3a 100644
--- a/jstests/core/geo_s2sparse.js
+++ b/jstests/core/geo_s2sparse.js
@@ -7,7 +7,7 @@
var coll = db.geo_s2sparse;
var point = {type: "Point", coordinates: [5, 5]};
var indexSpec = {geo: "2dsphere", nonGeo: 1};
- var indexName = 'test.geo_s2sparse.$geo_2dsphere_nonGeo_1';
+ var indexName = 'geo_2dsphere_nonGeo_1';
//
// V2 indices are "geo sparse" always.
@@ -103,7 +103,7 @@
coll.drop();
coll.ensureIndex({geo: "2dsphere", otherGeo: "2dsphere"});
- indexName = 'test.geo_s2sparse.$geo_2dsphere_otherGeo_2dsphere';
+ indexName = 'geo_2dsphere_otherGeo_2dsphere';
// Insert N documents with the first geo field.
bulkInsertDocs(coll, N, function(i) {
diff --git a/jstests/core/index_partial_create_drop.js b/jstests/core/index_partial_create_drop.js
index bf25fa3a6b1..55a6b06d117 100644
--- a/jstests/core/index_partial_create_drop.js
+++ b/jstests/core/index_partial_create_drop.js
@@ -23,7 +23,7 @@
} else {
kpi = res.keysPerIndex;
}
- return kpi[coll.getFullName() + ".$" + idxName];
+ return kpi[idxName];
};
coll.drop();
diff --git a/jstests/core/index_partial_write_ops.js b/jstests/core/index_partial_write_ops.js
index a88989883d1..730bcca5318 100644
--- a/jstests/core/index_partial_write_ops.js
+++ b/jstests/core/index_partial_write_ops.js
@@ -15,7 +15,7 @@
} else {
kpi = res.keysPerIndex;
}
- return kpi[coll.getFullName() + ".$" + idxName];
+ return kpi[idxName];
};
coll.drop();
diff --git a/src/mongo/db/catalog/collection_impl.cpp b/src/mongo/db/catalog/collection_impl.cpp
index 98b11660c74..57f1f5fa173 100644
--- a/src/mongo/db/catalog/collection_impl.cpp
+++ b/src/mongo/db/catalog/collection_impl.cpp
@@ -184,7 +184,6 @@ StatusWith<CollectionImpl::ValidationAction> _parseValidationAction(StringData n
} // namespace
-using std::endl;
using std::string;
using std::unique_ptr;
using std::vector;
@@ -412,8 +411,8 @@ Status CollectionImpl::insertDocuments(OperationContext* opCtx,
string whenFirst =
firstIdElem ? (string(" when first _id is ") + firstIdElem.str()) : "";
while (MONGO_FAIL_POINT(hangAfterCollectionInserts)) {
- log() << "hangAfterCollectionInserts fail point enabled for " << _ns.toString()
- << whenFirst << ". Blocking until fail point is disabled.";
+ log() << "hangAfterCollectionInserts fail point enabled for " << _ns << whenFirst
+ << ". Blocking until fail point is disabled.";
mongo::sleepsecs(1);
opCtx->checkForInterrupt();
}
@@ -1095,8 +1094,9 @@ void _validateIndexes(OperationContext* opCtx,
const IndexDescriptor* descriptor = entry->descriptor();
const IndexAccessMethod* iam = entry->accessMethod();
- log(LogComponent::kIndex) << "validating index " << descriptor->indexNamespace() << endl;
- ValidateResults& curIndexResults = (*indexNsResultsMap)[descriptor->indexNamespace()];
+ log(LogComponent::kIndex) << "validating index " << descriptor->indexName()
+ << " on collection " << descriptor->parentNS();
+ ValidateResults& curIndexResults = (*indexNsResultsMap)[descriptor->indexName()];
bool checkCounts = false;
int64_t numTraversedKeys;
int64_t numValidatedKeys;
@@ -1120,7 +1120,7 @@ void _validateIndexes(OperationContext* opCtx,
}
if (curIndexResults.valid) {
- keysPerIndex->appendNumber(descriptor->indexNamespace(),
+ keysPerIndex->appendNumber(descriptor->indexName(),
static_cast<long long>(numTraversedKeys));
} else {
results->valid = false;
@@ -1131,19 +1131,54 @@ void _validateIndexes(OperationContext* opCtx,
}
}
-void _markIndexEntriesInvalid(ValidateResultsMap* indexNsResultsMap, ValidateResults* results) {
+/**
+ * Executes the second phase of validation for improved error reporting. This is only done if
+ * any index inconsistencies are found during the first phase of validation.
+ */
+void _gatherIndexEntryErrors(OperationContext* opCtx,
+ RecordStore* recordStore,
+ IndexCatalog* indexCatalog,
+ IndexConsistency* indexConsistency,
+ RecordStoreValidateAdaptor* indexValidator,
+ ValidateResultsMap* indexNsResultsMap,
+ ValidateResults* result) {
+ indexConsistency->setSecondPhase();
+
+ log(LogComponent::kIndex) << "Starting to traverse through all the document key sets.";
+
+ // During the second phase of validation, iterate through each documents key set and only record
+ // the keys that were inconsistent during the first phase of validation.
+ std::unique_ptr<SeekableRecordCursor> cursor = recordStore->getCursor(opCtx, true);
+ while (auto record = cursor->next()) {
+ opCtx->checkForInterrupt();
- // The error message can't be more specific because even though the index is
- // invalid, we won't know if the corruption occurred on the index entry or in
- // the document.
- for (auto& it : *indexNsResultsMap) {
- // Marking all indexes as invalid since we don't know which one failed.
- ValidateResults& r = it.second;
- r.valid = false;
+ // We can ignore the status of validate as it was already checked during the first phase.
+ size_t validatedSize;
+ indexValidator->validate(record->id, record->data, &validatedSize).ignore();
}
- string msg = "one or more indexes contain invalid index entries.";
- results->errors.push_back(msg);
- results->valid = false;
+
+ log(LogComponent::kIndex) << "Finished traversing through all the document key sets.";
+ log(LogComponent::kIndex) << "Starting to traverse through all the indexes.";
+
+ // Iterate through all the indexes in the collection and only record the index entry keys that
+ // had inconsistencies during the first phase.
+ std::unique_ptr<IndexCatalog::IndexIterator> it = indexCatalog->getIndexIterator(opCtx, false);
+ while (it->more()) {
+ opCtx->checkForInterrupt();
+
+ const IndexCatalogEntry* entry = it->next();
+ const IndexDescriptor* descriptor = entry->descriptor();
+ const IndexAccessMethod* iam = entry->accessMethod();
+
+ log(LogComponent::kIndex) << "Traversing through the index entries for index "
+ << descriptor->indexName() << ".";
+ indexValidator->traverseIndex(
+ iam, descriptor, /*ValidateResults=*/nullptr, /*numTraversedKeys=*/nullptr);
+ }
+
+ log(LogComponent::kIndex) << "Finished traversing through all the indexes.";
+
+ indexConsistency->addIndexEntryErrors(indexNsResultsMap, result);
}
void _validateIndexKeyCount(OperationContext* opCtx,
@@ -1156,7 +1191,7 @@ void _validateIndexKeyCount(OperationContext* opCtx,
indexCatalog->getIndexIterator(opCtx, false);
while (indexIterator->more()) {
const IndexDescriptor* descriptor = indexIterator->next()->descriptor();
- ValidateResults& curIndexResults = (*indexNsResultsMap)[descriptor->indexNamespace()];
+ ValidateResults& curIndexResults = (*indexNsResultsMap)[descriptor->indexName()];
if (curIndexResults.valid) {
indexValidator->validateIndexKeyCount(
@@ -1277,8 +1312,7 @@ Status CollectionImpl::validate(OperationContext* opCtx,
// Validate the record store
std::string uuidString = str::stream()
<< " (UUID: " << (uuid() ? uuid()->toString() : "none") << ")";
- log(LogComponent::kIndex) << "validating collection " << ns().toString() << uuidString
- << endl;
+ log(LogComponent::kIndex) << "validating collection " << ns() << uuidString;
_validateRecordStore(
opCtx, _recordStore, level, background, &indexValidator, results, output);
@@ -1296,7 +1330,16 @@ Status CollectionImpl::validate(OperationContext* opCtx,
results);
if (indexConsistency.haveEntryMismatch()) {
- _markIndexEntriesInvalid(&indexNsResultsMap, results);
+ log(LogComponent::kIndex)
+ << "Index inconsistencies were detected on collection " << ns()
+ << ". Starting the second phase of index validation to gather concise errors.";
+ _gatherIndexEntryErrors(opCtx,
+ _recordStore,
+ _indexCatalog.get(),
+ &indexConsistency,
+ &indexValidator,
+ &indexNsResultsMap,
+ results);
}
}
diff --git a/src/mongo/db/catalog/index_consistency.cpp b/src/mongo/db/catalog/index_consistency.cpp
index a10e63dee3a..1ff0f14d65a 100644
--- a/src/mongo/db/catalog/index_consistency.cpp
+++ b/src/mongo/db/catalog/index_consistency.cpp
@@ -41,6 +41,7 @@
#include "mongo/db/index_names.h"
#include "mongo/db/server_options.h"
#include "mongo/db/storage/key_string.h"
+#include "mongo/db/storage/record_store.h"
#include "mongo/db/storage/sorted_data_interface.h"
#include "mongo/util/elapsed_tracker.h"
@@ -68,7 +69,8 @@ IndexConsistency::IndexConsistency(OperationContext* opCtx,
_recordStore(recordStore),
_tracker(opCtx->getServiceContext()->getFastClockSource(),
internalQueryExecYieldIterations.load(),
- Milliseconds(internalQueryExecYieldPeriodMS.load())) {
+ Milliseconds(internalQueryExecYieldPeriodMS.load())),
+ _firstPhase(true) {
IndexCatalog* indexCatalog = _collection->getIndexCatalog();
std::unique_ptr<IndexCatalog::IndexIterator> indexIterator =
@@ -78,18 +80,18 @@ IndexConsistency::IndexConsistency(OperationContext* opCtx,
while (indexIterator->more()) {
const IndexDescriptor* descriptor = indexIterator->next()->descriptor();
- std::string indexNs = descriptor->indexNamespace();
-
- _indexNumber[descriptor->indexNamespace()] = indexNumber;
+ std::string indexName = descriptor->indexName();
+ _indexNumber[indexName] = indexNumber;
IndexInfo indexInfo;
- indexInfo.isReady =
- _collection->getCatalogEntry()->isIndexReady(opCtx, descriptor->indexName());
+ indexInfo.indexName = indexName;
+ indexInfo.keyPattern = descriptor->keyPattern();
+ indexInfo.isReady = _collection->getCatalogEntry()->isIndexReady(opCtx, indexName);
- uint32_t indexNsHash;
- MurmurHash3_x86_32(indexNs.c_str(), indexNs.size(), 0, &indexNsHash);
- indexInfo.indexNsHash = indexNsHash;
+ uint32_t indexNameHash;
+ MurmurHash3_x86_32(indexName.c_str(), indexName.size(), 0, &indexNameHash);
+ indexInfo.indexNameHash = indexNameHash;
indexInfo.indexScanFinished = false;
indexInfo.numKeys = 0;
@@ -103,24 +105,30 @@ IndexConsistency::IndexConsistency(OperationContext* opCtx,
}
}
-void IndexConsistency::addDocKey(const KeyString& ks, int indexNumber) {
+void IndexConsistency::addDocKey(const KeyString& ks,
+ int indexNumber,
+ const RecordId& recordId,
+ const BSONObj& indexKey) {
if (indexNumber < 0 || indexNumber >= static_cast<int>(_indexesInfo.size())) {
return;
}
stdx::lock_guard<stdx::mutex> lock(_classMutex);
- _addDocKey_inlock(ks, indexNumber);
+ _addDocKey_inlock(ks, indexNumber, recordId, indexKey);
}
-void IndexConsistency::addIndexKey(const KeyString& ks, int indexNumber) {
+void IndexConsistency::addIndexKey(const KeyString& ks,
+ int indexNumber,
+ const RecordId& recordId,
+ const BSONObj& indexKey) {
if (indexNumber < 0 || indexNumber >= static_cast<int>(_indexesInfo.size())) {
return;
}
stdx::lock_guard<stdx::mutex> lock(_classMutex);
- _addIndexKey_inlock(ks, indexNumber);
+ _addIndexKey_inlock(ks, indexNumber, recordId, indexKey);
}
void IndexConsistency::addMultikeyMetadataPath(const KeyString& ks, int indexNumber) {
@@ -216,33 +224,123 @@ int64_t IndexConsistency::getNumExtraIndexKeys(int indexNumber) const {
return _indexesInfo.at(indexNumber).numExtraIndexKeys;
}
-void IndexConsistency::nextStage() {
+int IndexConsistency::getIndexNumber(const std::string& indexName) {
- stdx::lock_guard<stdx::mutex> lock(_classMutex);
- if (_stage == ValidationStage::DOCUMENT) {
- _stage = ValidationStage::INDEX;
- } else if (_stage == ValidationStage::INDEX) {
- _stage = ValidationStage::NONE;
+ auto search = _indexNumber.find(indexName);
+ if (search != _indexNumber.end()) {
+ return search->second;
}
+
+ return -1;
}
-ValidationStage IndexConsistency::getStage() const {
+void IndexConsistency::setSecondPhase() {
stdx::lock_guard<stdx::mutex> lock(_classMutex);
- return _stage;
+ invariant(_firstPhase);
+ _firstPhase = false;
}
-int IndexConsistency::getIndexNumber(const std::string& indexNs) {
+void IndexConsistency::addIndexEntryErrors(ValidateResultsMap* indexNsResultsMap,
+ ValidateResults* results) {
+ stdx::lock_guard<stdx::mutex> lock(_classMutex);
+ invariant(!_firstPhase);
- auto search = _indexNumber.find(indexNs);
- if (search != _indexNumber.end()) {
- return search->second;
+ // We'll report up to 1MB for extra index entry errors and missing index entry errors.
+ const int kErrorSizeMB = 1 * 1024 * 1024;
+ int numMissingIndexEntriesSizeMB = 0;
+ int numExtraIndexEntriesSizeMB = 0;
+
+ int numMissingIndexEntryErrors = _missingIndexEntries.size();
+ int numExtraIndexEntryErrors = 0;
+ for (const auto& item : _extraIndexEntries) {
+ numExtraIndexEntryErrors += item.second.size();
}
- return -1;
+ // Inform which indexes have inconsistences and add the BSON objects of the inconsistent index
+ // entries to the results vector.
+ bool missingIndexEntrySizeLimitWarning = false;
+ for (const auto& missingIndexEntry : _missingIndexEntries) {
+ const BSONObj& entry = missingIndexEntry.second;
+
+ // Only count the indexKey and idKey fields towards the total size.
+ numMissingIndexEntriesSizeMB += entry["indexKey"].size();
+ if (entry.hasField("idKey")) {
+ numMissingIndexEntriesSizeMB += entry["idKey"].size();
+ }
+
+ if (numMissingIndexEntriesSizeMB <= kErrorSizeMB) {
+ results->missingIndexEntries.push_back(entry);
+ } else if (!missingIndexEntrySizeLimitWarning) {
+ StringBuilder ss;
+ ss << "Not all missing index entry inconsistencies are listed due to size limitations.";
+ results->errors.push_back(ss.str());
+
+ missingIndexEntrySizeLimitWarning = true;
+ }
+
+ std::string indexName = entry["indexName"].String();
+ if (!indexNsResultsMap->at(indexName).valid) {
+ continue;
+ }
+
+ StringBuilder ss;
+ ss << "Index with name '" << indexName << "' has inconsistencies.";
+ results->errors.push_back(ss.str());
+
+ indexNsResultsMap->at(indexName).valid = false;
+ }
+
+ bool extraIndexEntrySizeLimitWarning = false;
+ for (const auto& extraIndexEntry : _extraIndexEntries) {
+ const SimpleBSONObjSet& entries = extraIndexEntry.second;
+ for (const auto& entry : entries) {
+ // Only count the indexKey field towards the total size.
+ numExtraIndexEntriesSizeMB += entry["indexKey"].size();
+ if (numExtraIndexEntriesSizeMB <= kErrorSizeMB) {
+ results->extraIndexEntries.push_back(entry);
+ } else if (!extraIndexEntrySizeLimitWarning) {
+ StringBuilder ss;
+ ss << "Not all extra index entry inconsistencies are listed due to size "
+ "limitations.";
+ results->errors.push_back(ss.str());
+
+ extraIndexEntrySizeLimitWarning = true;
+ }
+
+ std::string indexName = entry["indexName"].String();
+ if (!indexNsResultsMap->at(indexName).valid) {
+ continue;
+ }
+
+ StringBuilder ss;
+ ss << "Index with name '" << indexName << "' has inconsistencies.";
+ results->errors.push_back(ss.str());
+
+ indexNsResultsMap->at(indexName).valid = false;
+ }
+ }
+
+ // Inform how many inconsistencies were detected.
+ if (numMissingIndexEntryErrors > 0) {
+ StringBuilder ss;
+ ss << "Detected " << numMissingIndexEntryErrors << " missing index entries.";
+ results->warnings.push_back(ss.str());
+ }
+
+ if (numExtraIndexEntryErrors > 0) {
+ StringBuilder ss;
+ ss << "Detected " << numExtraIndexEntryErrors << " extra index entries.";
+ results->warnings.push_back(ss.str());
+ }
+
+ results->valid = false;
}
-void IndexConsistency::_addDocKey_inlock(const KeyString& ks, int indexNumber) {
+void IndexConsistency::_addDocKey_inlock(const KeyString& ks,
+ int indexNumber,
+ const RecordId& recordId,
+ const BSONObj& indexKey) {
// Ignore indexes that weren't ready before we started validation.
if (!_indexesInfo.at(indexNumber).isReady) {
@@ -250,11 +348,46 @@ void IndexConsistency::_addDocKey_inlock(const KeyString& ks, int indexNumber) {
}
const uint32_t hash = _hashKeyString(ks, indexNumber);
- _indexKeyCount[hash]++;
- _indexesInfo.at(indexNumber).numRecords++;
+
+ if (_firstPhase) {
+ // During the first phase of validation we only keep track of the count for the document
+ // keys encountered.
+ _indexKeyCount[hash]++;
+ _indexesInfo.at(indexNumber).numRecords++;
+ } else {
+ // For the second phase of validation, we keep track of the document keys that mapped to
+ // an inconsistent hash bucket during the first phase.
+ auto searchBuckets = _indexKeyCount.find(hash);
+ invariant(searchBuckets != _indexKeyCount.end());
+ if (searchBuckets->second == 0) {
+ // No inconsistencies in this hash bucket during the first phase.
+ return;
+ }
+
+ // Get the documents _id index key.
+ auto cursor = _recordStore->getCursor(_opCtx);
+ auto record = cursor->seekExact(recordId);
+ invariant(record);
+
+ BSONObj data = record->data.toBson();
+ boost::optional<BSONElement> idKey = boost::none;
+ if (data.hasField("_id")) {
+ idKey = data["_id"];
+ }
+
+ std::string key = std::string(ks.getBuffer(), ks.getSize());
+ BSONObj info = _generateInfo(indexNumber, recordId, indexKey, idKey);
+
+ // Cannot have duplicate KeyStrings during the document scan phase.
+ invariant(_missingIndexEntries.count(key) == 0);
+ _missingIndexEntries.insert(std::make_pair(key, info));
+ }
}
-void IndexConsistency::_addIndexKey_inlock(const KeyString& ks, int indexNumber) {
+void IndexConsistency::_addIndexKey_inlock(const KeyString& ks,
+ int indexNumber,
+ const RecordId& recordId,
+ const BSONObj& indexKey) {
// Ignore indexes that weren't ready before we started validation.
if (!_indexesInfo.at(indexNumber).isReady) {
@@ -262,16 +395,84 @@ void IndexConsistency::_addIndexKey_inlock(const KeyString& ks, int indexNumber)
}
const uint32_t hash = _hashKeyString(ks, indexNumber);
- _indexKeyCount[hash]--;
- _indexesInfo.at(indexNumber).numKeys++;
+
+ if (_firstPhase) {
+ // During the first phase of validation we only keep track of the count for the index entry
+ // keys encountered.
+ _indexKeyCount[hash]--;
+ _indexesInfo.at(indexNumber).numKeys++;
+ } else {
+ // For the second phase of validation, on the buckets that were inconsistent during the
+ // first phase, we see if there was a corresponding document key for the index entry key
+ // we have.
+ // If there is a corresponding document key for the index entry key, we remove the key from
+ // the '_missingIndexEntries' map. However if there was no document key for the index entry
+ // key, we add the key to the '_extraIndexEntries' map.
+ auto searchBuckets = _indexKeyCount.find(hash);
+ invariant(searchBuckets != _indexKeyCount.end());
+ if (searchBuckets->second == 0) {
+ // No inconsistencies in this hash bucket during the first phase.
+ return;
+ }
+
+ std::string key = std::string(ks.getBuffer(), ks.getSize());
+ BSONObj info = _generateInfo(indexNumber, recordId, indexKey, boost::none);
+
+ if (_missingIndexEntries.count(key) == 0) {
+ // We may have multiple extra index entries for a given KeyString.
+ auto search = _extraIndexEntries.find(key);
+ if (search == _extraIndexEntries.end()) {
+ SimpleBSONObjSet infoSet = {info};
+ _extraIndexEntries.insert(std::make_pair(key, infoSet));
+ return;
+ }
+
+ search->second.insert(info);
+ } else {
+ _missingIndexEntries.erase(key);
+ }
+ }
+}
+
+BSONObj IndexConsistency::_generateInfo(const int& indexNumber,
+ const RecordId& recordId,
+ const BSONObj& indexKey,
+ boost::optional<BSONElement> idKey) {
+ const std::string& indexName = _indexesInfo.at(indexNumber).indexName;
+ const BSONObj& keyPattern = _indexesInfo.at(indexNumber).keyPattern;
+
+ // We need to rehydrate the indexKey for improved readability.
+ // {"": ObjectId(...)} -> {"_id": ObjectId(...)}
+ auto keysIt = keyPattern.begin();
+ auto valuesIt = indexKey.begin();
+
+ BSONObjBuilder b;
+ while (keysIt != keyPattern.end()) {
+ // keysIt and valuesIt must have the same number of elements.
+ invariant(valuesIt != indexKey.end());
+ b.appendAs(*valuesIt, keysIt->fieldName());
+ keysIt++;
+ valuesIt++;
+ }
+
+ BSONObj rehydratedKey = b.done();
+
+ if (idKey) {
+ return BSON("indexName" << indexName << "recordId" << recordId.repr() << "idKey" << *idKey
+ << "indexKey"
+ << rehydratedKey);
+ } else {
+ return BSON("indexName" << indexName << "recordId" << recordId.repr() << "indexKey"
+ << rehydratedKey);
+ }
}
uint32_t IndexConsistency::_hashKeyString(const KeyString& ks, int indexNumber) const {
- uint32_t indexNsHash = _indexesInfo.at(indexNumber).indexNsHash;
+ uint32_t indexNameHash = _indexesInfo.at(indexNumber).indexNameHash;
MurmurHash3_x86_32(
- ks.getTypeBits().getBuffer(), ks.getTypeBits().getSize(), indexNsHash, &indexNsHash);
- MurmurHash3_x86_32(ks.getBuffer(), ks.getSize(), indexNsHash, &indexNsHash);
- return indexNsHash % (1U << 22);
+ ks.getTypeBits().getBuffer(), ks.getTypeBits().getSize(), indexNameHash, &indexNameHash);
+ MurmurHash3_x86_32(ks.getBuffer(), ks.getSize(), indexNameHash, &indexNameHash);
+ return indexNameHash % (1U << 22);
}
} // namespace mongo
diff --git a/src/mongo/db/catalog/index_consistency.h b/src/mongo/db/catalog/index_consistency.h
index d22696c5b0c..348f282e0af 100644
--- a/src/mongo/db/catalog/index_consistency.h
+++ b/src/mongo/db/catalog/index_consistency.h
@@ -39,20 +39,6 @@
namespace mongo {
/**
- * The ValidationStage allows the IndexConsistency class to perform
- * the correct operations that depend on where we are in the validation.
- */
-enum class ValidationStage { DOCUMENT, INDEX, NONE };
-
-/**
- * The ValidationOperation is used by classes using the IndexObserver to let us know what operation
- * was associated with it.
- * The `UPDATE` operation can be seen as two independent operations (`REMOVE` operation followed
- * by an `INSERT` operation).
- */
-enum class ValidationOperation { INSERT, REMOVE };
-
-/**
* The IndexConsistency class is used to keep track of the index consistency.
* It does this by using the index keys from index entries and index keys generated from the
* document to ensure there is a one-to-one mapping for each key.
@@ -64,10 +50,14 @@ enum class ValidationOperation { INSERT, REMOVE };
* Contains all the index information and stats throughout the validation.
*/
struct IndexInfo {
+ // The name of the index.
+ std::string indexName;
+ // The index key pattern.
+ BSONObj keyPattern;
// Informs us if the index was ready or not for consumption during the start of validation.
bool isReady;
- // Contains the pre-computed hashed of the index namespace.
- uint32_t indexNsHash;
+ // Contains the pre-computed hash of the index name.
+ uint32_t indexNameHash;
// True if the index has finished scanning from the index scan stage, otherwise false.
bool indexScanFinished;
// The number of index entries belonging to the index.
@@ -85,6 +75,8 @@ struct IndexInfo {
};
class IndexConsistency final {
+ using ValidateResultsMap = std::map<std::string, ValidateResults>;
+
public:
IndexConsistency(OperationContext* opCtx,
Collection* collection,
@@ -95,8 +87,14 @@ public:
/**
* Helper functions for `_addDocKey` and `_addIndexKey` for concurrency control.
*/
- void addDocKey(const KeyString& ks, int indexNumber);
- void addIndexKey(const KeyString& ks, int indexNumber);
+ void addDocKey(const KeyString& ks,
+ int indexNumber,
+ const RecordId& recordId,
+ const BSONObj& indexKey);
+ void addIndexKey(const KeyString& ks,
+ int indexNumber,
+ const RecordId& recordId,
+ const BSONObj& indexKey);
/**
* To validate $** multikey metadata paths, we first scan the collection and add a hash of all
@@ -146,30 +144,34 @@ public:
int64_t getNumExtraIndexKeys(int indexNumber) const;
/**
- * Moves the `_stage` variable to the next corresponding stage in the following order:
- * `DOCUMENT` -> `INDEX`
- * `INDEX` -> `NONE`
- * `NONE` -> `NONE`
+ * Returns the index number for the corresponding index name.
*/
- void nextStage();
+ int getIndexNumber(const std::string& indexName);
/**
- * Returns the `_stage` that the validation is on.
+ * Informs the IndexConsistency object that we're advancing to the second phase of index
+ * validation.
*/
- ValidationStage getStage() const;
+ void setSecondPhase();
/**
- * Returns the index number for the corresponding index namespace's.
+ * Records the errors gathered from the second phase of index validation into the provided
+ * ValidateResultsMap and ValidateResults.
*/
- int getIndexNumber(const std::string& indexNs);
+ void addIndexEntryErrors(ValidateResultsMap* indexNsResultsMap, ValidateResults* results);
private:
+ IndexConsistency() = delete;
+
OperationContext* _opCtx;
Collection* _collection;
const NamespaceString _nss;
const RecordStore* _recordStore;
ElapsedTracker _tracker;
+ // Protects the variables below.
+ mutable stdx::mutex _classMutex;
+
// We map the hashed KeyString values to a bucket which contain the count of how many
// index keys and document keys we've seen in each bucket.
// Count rules:
@@ -179,7 +181,7 @@ private:
// are too few index entries.
// - If the count is < 0 in the bucket at the end of the validation pass, then there
// are too many index entries.
- std::map<uint32_t, uint32_t> _indexKeyCount;
+ std::map<uint32_t, int32_t> _indexKeyCount;
// Contains the corresponding index number for each index namespace
std::map<std::string, int> _indexNumber;
@@ -187,32 +189,59 @@ private:
// A mapping of index numbers to IndexInfo
std::map<int, IndexInfo> _indexesInfo;
- // The current index namespace being scanned in the index scan phase.
- int _currentIndex = -1;
-
- // The stage that the validation is currently on.
- ValidationStage _stage = ValidationStage::DOCUMENT;
+ // Whether we're in the first or second phase of index validation.
+ bool _firstPhase;
- // Threshold for the number of errors to record before returning "There are too many errors".
- static const int _kErrorThreshold = 100;
+ // Populated during the second phase of validation, this map contains the index entries that
+ // were pointing at an invalid document key.
+ // The map contains a KeyString pointing at a set of BSON objects as there may be multiple
+ // extra index entries for the same KeyString.
+ std::map<std::string, SimpleBSONObjSet> _extraIndexEntries;
- // The current number of errors that are recorded.
- int _numErrorsRecorded = 0;
+ // Populated during the second phase of validation, this map contains the index entries that
+ // were missing while the document key was in place.
+ // The map contains a KeyString pointing to a BSON object as there can only be one missing index
+ // entry for a given KeyString.
+ std::map<std::string, BSONObj> _missingIndexEntries;
- // Only one thread can use the class at a time
- mutable stdx::mutex _classMutex;
+ /**
+ * During the first phase of validation, given the document's key KeyString, increment the
+ * corresponding `_indexKeyCount` by hashing it.
+ * For the second phase of validation, keep track of the document keys that hashed to
+ * inconsistent hash buckets during the first phase of validation.
+ */
+ void _addDocKey_inlock(const KeyString& ks,
+ int indexNumber,
+ const RecordId& recordId,
+ const BSONObj& indexKey);
/**
- * Given the document's key KeyString, increment the corresponding `_indexKeyCount`
- * by hashing it.
+ * During the first phase of validation, given the index entry's KeyString, decrement the
+ * corresponding `_indexKeyCount` by hashing it.
+ * For the second phase of validation, try to match the index entry keys that hashed to
+ * inconsistent hash buckets during the first phase of validation to document keys.
*/
- void _addDocKey_inlock(const KeyString& ks, int indexNumber);
+ void _addIndexKey_inlock(const KeyString& ks,
+ int indexNumber,
+ const RecordId& recordId,
+ const BSONObj& indexKey);
/**
- * Given the index entry's KeyString, decrement the corresponding `_indexKeyCount`
- * by hashing it.
+ * Generates a key for the second phase of validation. The keys format is the following:
+ * {
+ * indexName: <string>,
+ * recordId: <number>,
+ * idKey: <object>, // Only available for missing index entries.
+ * indexKey: {
+ * <key>: <value>,
+ * ...
+ * }
+ * }
*/
- void _addIndexKey_inlock(const KeyString& ks, int indexNumber);
+ BSONObj _generateInfo(const int& indexNumber,
+ const RecordId& recordId,
+ const BSONObj& indexKey,
+ boost::optional<BSONElement> idKey);
/**
* Returns a hashed value from the given KeyString and index namespace.
diff --git a/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp b/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp
index 3151fca3268..bb19f9fd11a 100644
--- a/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp
+++ b/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp
@@ -89,8 +89,8 @@ Status RecordStoreValidateAdaptor::validate(const RecordId& recordId,
while (it->more()) {
const IndexDescriptor* descriptor = it->next()->descriptor();
- const std::string indexNs = descriptor->indexNamespace();
- int indexNumber = _indexConsistency->getIndexNumber(indexNs);
+ const std::string indexName = descriptor->indexName();
+ int indexNumber = _indexConsistency->getIndexNumber(indexName);
ValidateResults curRecordResults;
const IndexAccessMethod* iam = _indexCatalog->getEntry(descriptor)->accessMethod();
@@ -98,7 +98,7 @@ Status RecordStoreValidateAdaptor::validate(const RecordId& recordId,
if (descriptor->isPartial()) {
const IndexCatalogEntry* ice = _indexCatalog->getEntry(descriptor);
if (!ice->getFilterExpression()->matchesBSON(recordBson)) {
- (*_indexNsResultsMap)[indexNs] = curRecordResults;
+ (*_indexNsResultsMap)[indexName] = curRecordResults;
continue;
}
}
@@ -140,9 +140,9 @@ Status RecordStoreValidateAdaptor::validate(const RecordId& recordId,
// We want to use the latest version of KeyString here.
KeyString ks(KeyString::kLatestVersion, key, ord, recordId);
- _indexConsistency->addDocKey(ks, indexNumber);
+ _indexConsistency->addDocKey(ks, indexNumber, recordId, key);
}
- (*_indexNsResultsMap)[indexNs] = curRecordResults;
+ (*_indexNsResultsMap)[indexName] = curRecordResults;
}
return status;
}
@@ -151,8 +151,8 @@ void RecordStoreValidateAdaptor::traverseIndex(const IndexAccessMethod* iam,
const IndexDescriptor* descriptor,
ValidateResults* results,
int64_t* numTraversedKeys) {
- auto indexNs = descriptor->indexNamespace();
- int indexNumber = _indexConsistency->getIndexNumber(indexNs);
+ auto indexName = descriptor->indexName();
+ int indexNumber = _indexConsistency->getIndexNumber(indexName);
int64_t numKeys = 0;
const auto& key = descriptor->keyPattern();
@@ -170,12 +170,15 @@ void RecordStoreValidateAdaptor::traverseIndex(const IndexAccessMethod* iam,
stdx::make_unique<KeyString>(version, indexEntry->key, ord, indexEntry->loc);
// Ensure that the index entries are in increasing or decreasing order.
if (!isFirstEntry && *indexKeyString < *prevIndexKeyString) {
- if (results->valid) {
+ if (results && results->valid) {
results->errors.push_back(
"one or more indexes are not in strictly ascending or descending "
"order");
}
- results->valid = false;
+
+ if (results) {
+ results->valid = false;
+ }
}
const RecordId kWildcardMultikeyMetadataRecordId{
@@ -188,21 +191,24 @@ void RecordStoreValidateAdaptor::traverseIndex(const IndexAccessMethod* iam,
continue;
}
- _indexConsistency->addIndexKey(*indexKeyString, indexNumber);
+ _indexConsistency->addIndexKey(
+ *indexKeyString, indexNumber, indexEntry->loc, indexEntry->key);
numKeys++;
isFirstEntry = false;
prevIndexKeyString.swap(indexKeyString);
}
- if (_indexConsistency->getMultikeyMetadataPathCount(indexNumber) > 0) {
+ if (results && _indexConsistency->getMultikeyMetadataPathCount(indexNumber) > 0) {
results->errors.push_back(
str::stream() << "Index '" << descriptor->indexName()
<< "' has one or more missing multikey metadata index keys");
results->valid = false;
}
- *numTraversedKeys = numKeys;
+ if (numTraversedKeys) {
+ *numTraversedKeys = numKeys;
+ }
}
void RecordStoreValidateAdaptor::traverseRecordStore(RecordStore* recordStore,
@@ -261,8 +267,8 @@ void RecordStoreValidateAdaptor::traverseRecordStore(RecordStore* recordStore,
void RecordStoreValidateAdaptor::validateIndexKeyCount(const IndexDescriptor* idx,
int64_t numRecs,
ValidateResults& results) {
- const std::string indexNs = idx->indexNamespace();
- int indexNumber = _indexConsistency->getIndexNumber(indexNs);
+ const std::string indexName = idx->indexName();
+ int indexNumber = _indexConsistency->getIndexNumber(indexName);
int64_t numIndexedKeys = _indexConsistency->getNumKeys(indexNumber);
int64_t numLongKeys = _indexConsistency->getNumLongKeys(indexNumber);
auto totalKeys = numLongKeys + numIndexedKeys;
diff --git a/src/mongo/db/commands/validate.cpp b/src/mongo/db/commands/validate.cpp
index df8abe7b222..a15d7c7b46b 100644
--- a/src/mongo/db/commands/validate.cpp
+++ b/src/mongo/db/commands/validate.cpp
@@ -186,6 +186,8 @@ public:
result.appendBool("valid", results.valid);
result.append("warnings", results.warnings);
result.append("errors", results.errors);
+ result.append("extraIndexEntries", results.extraIndexEntries);
+ result.append("missingIndexEntries", results.missingIndexEntries);
if (!results.valid) {
result.append("advice",
diff --git a/src/mongo/db/storage/record_store.h b/src/mongo/db/storage/record_store.h
index 5389e8f6439..3dce47699cf 100644
--- a/src/mongo/db/storage/record_store.h
+++ b/src/mongo/db/storage/record_store.h
@@ -610,6 +610,8 @@ struct ValidateResults {
bool valid;
std::vector<std::string> errors;
std::vector<std::string> warnings;
+ std::vector<BSONObj> extraIndexEntries;
+ std::vector<BSONObj> missingIndexEntries;
};
/**