diff options
author | Mohammad Dashti <mdashti@gmail.com> | 2023-01-09 22:29:03 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-01-09 23:26:58 +0000 |
commit | 04d0423767ad5a845805e4ba37cbcd68648bb516 (patch) | |
tree | 4bb2b31b8044e1ab7c659b2f3bef30addb46e107 | |
parent | 1d1514b2aa235695f1b7649db620f986e16c6690 (diff) | |
download | mongo-04d0423767ad5a845805e4ba37cbcd68648bb516.tar.gz |
SERVER-71560 Columnstore index repair
Co-authored-by: Mohammad Dashti <mdashti@gmail.com>
-rw-r--r-- | src/mongo/db/catalog/collection_validation_test.cpp | 161 | ||||
-rw-r--r-- | src/mongo/db/catalog/column_index_consistency.cpp | 85 | ||||
-rw-r--r-- | src/mongo/db/catalog/column_index_consistency.h | 11 | ||||
-rw-r--r-- | src/mongo/db/catalog/index_consistency.cpp | 1 |
4 files changed, 219 insertions, 39 deletions
diff --git a/src/mongo/db/catalog/collection_validation_test.cpp b/src/mongo/db/catalog/collection_validation_test.cpp index 19cfe013d13..a8a91339ae6 100644 --- a/src/mongo/db/catalog/collection_validation_test.cpp +++ b/src/mongo/db/catalog/collection_validation_test.cpp @@ -240,6 +240,15 @@ int setUpInvalidData(OperationContext* opCtx) { return 1; } +/** + * Convenience function to convert ValidateResults to a BSON object. + */ +BSONObj resultToBSON(const ValidateResults& vr) { + BSONObjBuilder builder; + vr.appendToResultObj(&builder, true /* debugging */); + return builder.obj(); +} + class CollectionValidationColumnStoreIndexTest : public CollectionValidationDiskTest { protected: CollectionValidationColumnStoreIndexTest() : CollectionValidationDiskTest() {} @@ -520,6 +529,74 @@ protected: return results; }); } + + /** + * This method repairs the (possible) index corruptions by running the validate command and + * returns the results from this command (after doing some common assertions). In addition, + * before returning this result, another call to the validate command is done to repair the + * index again. It's expected that this second call to index repair will not lead to any repair, + * as it's done in the first call (if any corruption exists). + */ + std::pair<BSONObj, ValidateResults> repairIndexCorruptions(const NamespaceString& nss, + int numDocs) { + auto opCtx = operationContext(); + const auto repairResults1 = + foregroundValidate(nss, + opCtx, + /*valid*/ true, + /*numRecords*/ numDocs, + /*numInvalidDocuments*/ 0, + /*numErrors*/ 0, + {CollectionValidation::ValidateMode::kForeground}, + CollectionValidation::RepairMode::kFixErrors); + + ASSERT_EQ(repairResults1.size(), 1); + + const auto& repairResult1 = repairResults1[0]; + const auto& validateResults1 = repairResult1.second; + auto obj = resultToBSON(validateResults1); + ASSERT(validateResults1.valid) << obj; + + ASSERT_EQ(validateResults1.missingIndexEntries.size(), 0U) << obj; + ASSERT_EQ(validateResults1.extraIndexEntries.size(), 0U) << obj; + ASSERT_EQ(validateResults1.corruptRecords.size(), 0U) << obj; + ASSERT_EQ(validateResults1.numRemovedCorruptRecords, 0U) << obj; + ASSERT_EQ(validateResults1.numDocumentsMovedToLostAndFound, 0U) << obj; + ASSERT_EQ(validateResults1.numOutdatedMissingIndexEntry, 0U) << obj; + + // After the first round of repair, if we do a second round of repair, it should always + // validate without requiring any actual repair anymore. + const auto repairResults2 = + foregroundValidate(nss, + opCtx, + /*valid*/ true, + /*numRecords*/ numDocs, + /*numInvalidDocuments*/ 0, + /*numErrors*/ 0, + {CollectionValidation::ValidateMode::kForeground}, + CollectionValidation::RepairMode::kFixErrors); + ASSERT_EQ(repairResults2.size(), 1); + + const auto& validateResults2 = repairResults2[0].second; + obj = resultToBSON(validateResults2); + ASSERT(validateResults2.valid) << obj; + ASSERT_FALSE(validateResults2.repaired) << obj; + + const auto warningsWithoutTransientErrors = omitTransientWarnings(validateResults2); + ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 0U) << obj; + ASSERT_EQ(validateResults2.missingIndexEntries.size(), 0U) << obj; + ASSERT_EQ(validateResults2.extraIndexEntries.size(), 0U) << obj; + ASSERT_EQ(validateResults2.corruptRecords.size(), 0U) << obj; + ASSERT_EQ(validateResults2.numRemovedCorruptRecords, 0U) << obj; + ASSERT_EQ(validateResults2.numRemovedExtraIndexEntries, 0U) << obj; + ASSERT_EQ(validateResults2.numInsertedMissingIndexEntries, 0U) << obj; + ASSERT_EQ(validateResults2.numDocumentsMovedToLostAndFound, 0U) << obj; + ASSERT_EQ(validateResults2.numOutdatedMissingIndexEntry, 0U) << obj; + + // return the result of initial repair command, so that tests can do further checks on that + // result if needed. + return repairResult1; + } }; // Verify that calling validate() on an empty collection with different validation levels returns an @@ -760,10 +837,7 @@ TEST_F(CollectionValidationTest, ValidateOldUniqueIndexKeyWarning) { for (const auto& result : results) { const auto& validateResults = result.second; - BSONObjBuilder builder; - const bool debugging = true; - validateResults.appendToResultObj(&builder, debugging); - const auto obj = builder.obj(); + const auto obj = resultToBSON(validateResults); ASSERT(validateResults.valid) << obj; const auto warningsWithoutTransientErrors = omitTransientWarnings(validateResults); ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 1U) << obj; @@ -816,10 +890,7 @@ TEST_F(CollectionValidationColumnStoreIndexTest, SingleInvalidIndexEntryCSI) { for (const auto& result : results) { const auto& validateResults = result.second; - BSONObjBuilder builder; - const bool debugging = true; - validateResults.appendToResultObj(&builder, debugging); - const auto obj = builder.obj(); + const auto obj = resultToBSON(validateResults); ASSERT_FALSE(validateResults.valid) << obj; const auto warningsWithoutTransientErrors = @@ -871,6 +942,21 @@ TEST_F(CollectionValidationColumnStoreIndexTest, SingleInvalidIndexEntryCSI) { ASSERT_EQ(validateResults.numDocumentsMovedToLostAndFound, 0U) << obj; ASSERT_EQ(validateResults.numOutdatedMissingIndexEntry, 0U) << obj; } + + const auto& repairResult = repairIndexCorruptions(nss, numDocs); + const auto& validateResults = repairResult.second; + const auto obj = resultToBSON(validateResults); + ASSERT(validateResults.valid) << obj; + ASSERT(validateResults.repaired) << obj; + + const auto warningsWithoutTransientErrors = + omitTransientWarnings(validateResults); + ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 1U) << obj; + ASSERT_STRING_CONTAINS(warningsWithoutTransientErrors.warnings[0], + "Inserted 1 missing index entries.") + << obj; + ASSERT_EQ(validateResults.numRemovedExtraIndexEntries, 1U) << obj; + ASSERT_EQ(validateResults.numInsertedMissingIndexEntries, 1U) << obj; } } } @@ -926,10 +1012,7 @@ TEST_F(CollectionValidationColumnStoreIndexTest, SingleExtraIndexEntry) { for (const auto& result : results) { const auto& validateResults = result.second; - BSONObjBuilder builder; - const bool debugging = true; - validateResults.appendToResultObj(&builder, debugging); - const auto obj = builder.obj(); + const auto obj = resultToBSON(validateResults); ASSERT_FALSE(validateResults.valid) << obj; const auto warningsWithoutTransientErrors = @@ -964,6 +1047,19 @@ TEST_F(CollectionValidationColumnStoreIndexTest, SingleExtraIndexEntry) { ASSERT_EQ(validateResults.numDocumentsMovedToLostAndFound, 0U) << obj; ASSERT_EQ(validateResults.numOutdatedMissingIndexEntry, 0U) << obj; } + + { + const auto& repairResult = repairIndexCorruptions(nss, numDocs); + const auto& validateResults = repairResult.second; + const auto obj = resultToBSON(validateResults); + ASSERT(validateResults.repaired) << obj; + + const auto warningsWithoutTransientErrors = + omitTransientWarnings(validateResults); + ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 0U) << obj; + ASSERT_EQ(validateResults.numRemovedExtraIndexEntries, 1U) << obj; + ASSERT_EQ(validateResults.numInsertedMissingIndexEntries, 0U) << obj; + } } } } @@ -1002,10 +1098,7 @@ TEST_F(CollectionValidationColumnStoreIndexTest, SingleMissingIndexEntryCSI) { for (const auto& result : results) { const auto& validateResults = result.second; - BSONObjBuilder builder; - const bool debugging = true; - validateResults.appendToResultObj(&builder, debugging); - const auto obj = builder.obj(); + const auto obj = resultToBSON(validateResults); ASSERT_FALSE(validateResults.valid) << obj; const auto warningsWithoutTransientErrors = @@ -1042,6 +1135,22 @@ TEST_F(CollectionValidationColumnStoreIndexTest, SingleMissingIndexEntryCSI) { ASSERT_EQ(validateResults.numDocumentsMovedToLostAndFound, 0U) << obj; ASSERT_EQ(validateResults.numOutdatedMissingIndexEntry, 0U) << obj; } + + { + const auto& repairResult = repairIndexCorruptions(nss, numDocs); + const auto& validateResults = repairResult.second; + const auto obj = resultToBSON(validateResults); + ASSERT(validateResults.repaired) << obj; + + const auto warningsWithoutTransientErrors = + omitTransientWarnings(validateResults); + ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 1U) << obj; + ASSERT_STRING_CONTAINS(warningsWithoutTransientErrors.warnings[0], + "Inserted 1 missing index entries.") + << obj; + ASSERT_EQ(validateResults.numRemovedExtraIndexEntries, 0U) << obj; + ASSERT_EQ(validateResults.numInsertedMissingIndexEntries, 1U) << obj; + } } } } @@ -1090,10 +1199,7 @@ TEST_F(CollectionValidationColumnStoreIndexTest, MultipleInvalidIndexEntryCSI) { for (const auto& result : results) { const auto& validateResults = result.second; - BSONObjBuilder builder; - const bool debugging = true; - validateResults.appendToResultObj(&builder, debugging); - const auto obj = builder.obj(); + const auto obj = resultToBSON(validateResults); ASSERT_FALSE(validateResults.valid) << obj; const auto warningsWithoutTransientErrors = omitTransientWarnings(validateResults); @@ -1119,6 +1225,21 @@ TEST_F(CollectionValidationColumnStoreIndexTest, MultipleInvalidIndexEntryCSI) { ASSERT_EQ(validateResults.numDocumentsMovedToLostAndFound, 0U) << obj; ASSERT_EQ(validateResults.numOutdatedMissingIndexEntry, 0U) << obj; } + + { + const auto& repairResult = repairIndexCorruptions(kNss, numDocs); + const auto& validateResults = repairResult.second; + const auto obj = resultToBSON(validateResults); + ASSERT(validateResults.repaired) << obj; + + const auto warningsWithoutTransientErrors = omitTransientWarnings(validateResults); + ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 1U) << obj; + ASSERT_STRING_CONTAINS(warningsWithoutTransientErrors.warnings[0], + "Inserted 3 missing index entries.") + << obj; + ASSERT_EQ(validateResults.numRemovedExtraIndexEntries, 5U) << obj; + ASSERT_EQ(validateResults.numInsertedMissingIndexEntries, 3U) << obj; + } } } // namespace diff --git a/src/mongo/db/catalog/column_index_consistency.cpp b/src/mongo/db/catalog/column_index_consistency.cpp index dfa6d65b7e6..6739ab316a9 100644 --- a/src/mongo/db/catalog/column_index_consistency.cpp +++ b/src/mongo/db/catalog/column_index_consistency.cpp @@ -67,6 +67,8 @@ int64_t ColumnIndexConsistency::traverseIndex(OperationContext* opCtx, } } + _investigateSuspects(opCtx, index); + return numIndexEntries; } @@ -76,7 +78,7 @@ void ColumnIndexConsistency::traverseRecord(OperationContext* opCtx, const RecordId& recordId, const BSONObj& record, ValidateResults* results) { - ColumnStoreAccessMethod* csam = dynamic_cast<ColumnStoreAccessMethod*>(index->accessMethod()); + ColumnStoreAccessMethod* csam = checked_cast<ColumnStoreAccessMethod*>(index->accessMethod()); // Shred the record. csam->getKeyGen().visitCellsForInsert( @@ -95,13 +97,8 @@ void ColumnIndexConsistency::traverseRecord(OperationContext* opCtx, }); } -void ColumnIndexConsistency::_addIndexEntryErrors(OperationContext* opCtx, - const IndexCatalogEntry* index, - ValidateResults* results) { - - if (_firstPhase) { - return; - } +void ColumnIndexConsistency::_investigateSuspects(OperationContext* opCtx, + const IndexCatalogEntry* index) { const auto indexName = index->descriptor()->indexName(); @@ -112,7 +109,7 @@ void ColumnIndexConsistency::_addIndexEntryErrors(OperationContext* opCtx, const auto& csiCursor = csiCursorIt->second; const ColumnStoreAccessMethod* csam = - dynamic_cast<ColumnStoreAccessMethod*>(index->accessMethod()); + checked_cast<ColumnStoreAccessMethod*>(index->accessMethod()); for (const auto rowId : _suspects) { // Gather all paths for this RecordId. @@ -147,16 +144,10 @@ void ColumnIndexConsistency::_addIndexEntryErrors(OperationContext* opCtx, if (!csiCell) { // Rowstore has entry that index doesn't _missingIndexEntries.emplace_back(rowCell); - results->missingIndexEntries.push_back( - _generateInfo(csam->indexName(), recordId, path, rowId)); } else if (csiCell->value != rowCell.value) { // Rowstore and index values diverge _extraIndexEntries.emplace_back(csiCell.get()); _missingIndexEntries.emplace_back(rowCell); - results->missingIndexEntries.push_back(_generateInfo( - csam->indexName(), recordId, path, rowId, csiCell->value)); - results->extraIndexEntries.push_back( - _generateInfo(csam->indexName(), recordId, path, rowId, rowCell.value)); } if (paths.count(rowCell.path) == 1) { paths.erase(rowCell.path); @@ -170,11 +161,21 @@ void ColumnIndexConsistency::_addIndexEntryErrors(OperationContext* opCtx, for (const auto& kv : paths) { for (int i = 0; i < kv.second; i++) { _extraIndexEntries.emplace_back(kv.first, rowId, ""_sd); - results->extraIndexEntries.push_back( - _generateInfo(csam->indexName(), recordId, kv.first, rowId)); } } } +} + + +void ColumnIndexConsistency::_addIndexEntryErrors(OperationContext* opCtx, + const IndexCatalogEntry* index, + ValidateResults* results) { + + if (_firstPhase) { + return; + } + + ColumnStoreAccessMethod* csam = checked_cast<ColumnStoreAccessMethod*>(index->accessMethod()); if (!_missingIndexEntries.empty() || !_extraIndexEntries.empty()) { StringBuilder ss; @@ -187,12 +188,20 @@ void ColumnIndexConsistency::_addIndexEntryErrors(OperationContext* opCtx, ss << "Detected " << _missingIndexEntries.size() << " missing index entries."; results->warnings.push_back(ss.str()); results->valid = false; + for (const auto& ent : _missingIndexEntries) { + results->missingIndexEntries.push_back( + _generateInfo(csam->indexName(), RecordId(ent.rid), ent.path, ent.rid, ent.value)); + } } if (!_extraIndexEntries.empty()) { StringBuilder ss; ss << "Detected " << _extraIndexEntries.size() << " extra index entries."; results->warnings.push_back(ss.str()); results->valid = false; + for (const auto& ent : _extraIndexEntries) { + results->extraIndexEntries.push_back( + _generateInfo(csam->indexName(), RecordId(ent.rid), ent.path, ent.rid, ent.value)); + } } } @@ -211,8 +220,48 @@ void ColumnIndexConsistency::addIndexEntryErrors(OperationContext* opCtx, } } +void ColumnIndexConsistency::repairIndexEntries(OperationContext* opCtx, + const IndexCatalogEntry* index, + ValidateResults* results) { + + ColumnStoreAccessMethod* csam = checked_cast<ColumnStoreAccessMethod*>(index->accessMethod()); + + writeConflictRetry(opCtx, "removingExtraColumnIndexEntries", _validateState->nss().ns(), [&] { + WriteUnitOfWork wunit(opCtx); + auto& indexResults = results->indexResultsMap[csam->indexName()]; + auto cursor = csam->writableStorage()->newWriteCursor(opCtx); + + for (auto it = _extraIndexEntries.begin(); it != _extraIndexEntries.end();) { + cursor->remove(it->path, it->rid); + results->numRemovedExtraIndexEntries++; + indexResults.keysTraversed--; + it = _extraIndexEntries.erase(it); + } + + for (auto it = _missingIndexEntries.begin(); it != _missingIndexEntries.end();) { + cursor->insert(it->path, it->rid, it->value); + results->numInsertedMissingIndexEntries++; + indexResults.keysTraversed++; + it = _missingIndexEntries.erase(it); + } + wunit.commit(); + }); + + results->repaired = true; +} + void ColumnIndexConsistency::repairIndexEntries(OperationContext* opCtx, ValidateResults* results) { - // TODO SERVER-71560 Repair should be implemented. + int numColumnStoreIndexes = 0; + for (const auto& index : _validateState->getIndexes()) { + const IndexDescriptor* descriptor = index->descriptor(); + if (descriptor->getAccessMethodName() == IndexNames::COLUMN) { + ++numColumnStoreIndexes; + uassert(7106123, + "The current implementation only supports a single column-store index.", + numColumnStoreIndexes <= 1); + repairIndexEntries(opCtx, index.get(), results); + } + } } void ColumnIndexConsistency::validateIndexKeyCount(OperationContext* opCtx, diff --git a/src/mongo/db/catalog/column_index_consistency.h b/src/mongo/db/catalog/column_index_consistency.h index a503e8c7154..defe030ba98 100644 --- a/src/mongo/db/catalog/column_index_consistency.h +++ b/src/mongo/db/catalog/column_index_consistency.h @@ -92,7 +92,14 @@ public: } /** - * If repair mode enabled, try inserting _missingIndexEntries into indexes. + * If repair mode enabled, tries to repair the given column-store index. + */ + void repairIndexEntries(OperationContext* opCtx, + const IndexCatalogEntry* index, + ValidateResults* results); + + /** + * If repair mode enabled, tries to repair the column-store indexes. */ void repairIndexEntries(OperationContext* opCtx, ValidateResults* results); @@ -180,6 +187,8 @@ private: const IndexCatalogEntry* index, ValidateResults* results); + void _investigateSuspects(OperationContext* opCtx, const IndexCatalogEntry* index); + void _tabulateEntry(const FullCellView& cell, int step); BSONObj _generateInfo(const std::string& indexName, diff --git a/src/mongo/db/catalog/index_consistency.cpp b/src/mongo/db/catalog/index_consistency.cpp index f16bb7ebfa3..cf8409499b6 100644 --- a/src/mongo/db/catalog/index_consistency.cpp +++ b/src/mongo/db/catalog/index_consistency.cpp @@ -432,6 +432,7 @@ void KeyStringIndexConsistency::addIndexKey(OperationContext* opCtx, results->numRemovedExtraIndexEntries += numDeleted; results->repaired = true; indexInfo->numKeys--; + _extraIndexEntries.erase(key); return; } |