summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMohammad Dashti <mdashti@gmail.com>2023-01-09 22:29:03 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-01-09 23:26:58 +0000
commit04d0423767ad5a845805e4ba37cbcd68648bb516 (patch)
tree4bb2b31b8044e1ab7c659b2f3bef30addb46e107
parent1d1514b2aa235695f1b7649db620f986e16c6690 (diff)
downloadmongo-04d0423767ad5a845805e4ba37cbcd68648bb516.tar.gz
SERVER-71560 Columnstore index repair
Co-authored-by: Mohammad Dashti <mdashti@gmail.com>
-rw-r--r--src/mongo/db/catalog/collection_validation_test.cpp161
-rw-r--r--src/mongo/db/catalog/column_index_consistency.cpp85
-rw-r--r--src/mongo/db/catalog/column_index_consistency.h11
-rw-r--r--src/mongo/db/catalog/index_consistency.cpp1
4 files changed, 219 insertions, 39 deletions
diff --git a/src/mongo/db/catalog/collection_validation_test.cpp b/src/mongo/db/catalog/collection_validation_test.cpp
index 19cfe013d13..a8a91339ae6 100644
--- a/src/mongo/db/catalog/collection_validation_test.cpp
+++ b/src/mongo/db/catalog/collection_validation_test.cpp
@@ -240,6 +240,15 @@ int setUpInvalidData(OperationContext* opCtx) {
return 1;
}
+/**
+ * Convenience function to convert ValidateResults to a BSON object.
+ */
+BSONObj resultToBSON(const ValidateResults& vr) {
+ BSONObjBuilder builder;
+ vr.appendToResultObj(&builder, true /* debugging */);
+ return builder.obj();
+}
+
class CollectionValidationColumnStoreIndexTest : public CollectionValidationDiskTest {
protected:
CollectionValidationColumnStoreIndexTest() : CollectionValidationDiskTest() {}
@@ -520,6 +529,74 @@ protected:
return results;
});
}
+
+ /**
+ * This method repairs the (possible) index corruptions by running the validate command and
+ * returns the results from this command (after doing some common assertions). In addition,
+ * before returning this result, another call to the validate command is done to repair the
+ * index again. It's expected that this second call to index repair will not lead to any repair,
+ * as it's done in the first call (if any corruption exists).
+ */
+ std::pair<BSONObj, ValidateResults> repairIndexCorruptions(const NamespaceString& nss,
+ int numDocs) {
+ auto opCtx = operationContext();
+ const auto repairResults1 =
+ foregroundValidate(nss,
+ opCtx,
+ /*valid*/ true,
+ /*numRecords*/ numDocs,
+ /*numInvalidDocuments*/ 0,
+ /*numErrors*/ 0,
+ {CollectionValidation::ValidateMode::kForeground},
+ CollectionValidation::RepairMode::kFixErrors);
+
+ ASSERT_EQ(repairResults1.size(), 1);
+
+ const auto& repairResult1 = repairResults1[0];
+ const auto& validateResults1 = repairResult1.second;
+ auto obj = resultToBSON(validateResults1);
+ ASSERT(validateResults1.valid) << obj;
+
+ ASSERT_EQ(validateResults1.missingIndexEntries.size(), 0U) << obj;
+ ASSERT_EQ(validateResults1.extraIndexEntries.size(), 0U) << obj;
+ ASSERT_EQ(validateResults1.corruptRecords.size(), 0U) << obj;
+ ASSERT_EQ(validateResults1.numRemovedCorruptRecords, 0U) << obj;
+ ASSERT_EQ(validateResults1.numDocumentsMovedToLostAndFound, 0U) << obj;
+ ASSERT_EQ(validateResults1.numOutdatedMissingIndexEntry, 0U) << obj;
+
+ // After the first round of repair, if we do a second round of repair, it should always
+ // validate without requiring any actual repair anymore.
+ const auto repairResults2 =
+ foregroundValidate(nss,
+ opCtx,
+ /*valid*/ true,
+ /*numRecords*/ numDocs,
+ /*numInvalidDocuments*/ 0,
+ /*numErrors*/ 0,
+ {CollectionValidation::ValidateMode::kForeground},
+ CollectionValidation::RepairMode::kFixErrors);
+ ASSERT_EQ(repairResults2.size(), 1);
+
+ const auto& validateResults2 = repairResults2[0].second;
+ obj = resultToBSON(validateResults2);
+ ASSERT(validateResults2.valid) << obj;
+ ASSERT_FALSE(validateResults2.repaired) << obj;
+
+ const auto warningsWithoutTransientErrors = omitTransientWarnings(validateResults2);
+ ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 0U) << obj;
+ ASSERT_EQ(validateResults2.missingIndexEntries.size(), 0U) << obj;
+ ASSERT_EQ(validateResults2.extraIndexEntries.size(), 0U) << obj;
+ ASSERT_EQ(validateResults2.corruptRecords.size(), 0U) << obj;
+ ASSERT_EQ(validateResults2.numRemovedCorruptRecords, 0U) << obj;
+ ASSERT_EQ(validateResults2.numRemovedExtraIndexEntries, 0U) << obj;
+ ASSERT_EQ(validateResults2.numInsertedMissingIndexEntries, 0U) << obj;
+ ASSERT_EQ(validateResults2.numDocumentsMovedToLostAndFound, 0U) << obj;
+ ASSERT_EQ(validateResults2.numOutdatedMissingIndexEntry, 0U) << obj;
+
+ // return the result of initial repair command, so that tests can do further checks on that
+ // result if needed.
+ return repairResult1;
+ }
};
// Verify that calling validate() on an empty collection with different validation levels returns an
@@ -760,10 +837,7 @@ TEST_F(CollectionValidationTest, ValidateOldUniqueIndexKeyWarning) {
for (const auto& result : results) {
const auto& validateResults = result.second;
- BSONObjBuilder builder;
- const bool debugging = true;
- validateResults.appendToResultObj(&builder, debugging);
- const auto obj = builder.obj();
+ const auto obj = resultToBSON(validateResults);
ASSERT(validateResults.valid) << obj;
const auto warningsWithoutTransientErrors = omitTransientWarnings(validateResults);
ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 1U) << obj;
@@ -816,10 +890,7 @@ TEST_F(CollectionValidationColumnStoreIndexTest, SingleInvalidIndexEntryCSI) {
for (const auto& result : results) {
const auto& validateResults = result.second;
- BSONObjBuilder builder;
- const bool debugging = true;
- validateResults.appendToResultObj(&builder, debugging);
- const auto obj = builder.obj();
+ const auto obj = resultToBSON(validateResults);
ASSERT_FALSE(validateResults.valid) << obj;
const auto warningsWithoutTransientErrors =
@@ -871,6 +942,21 @@ TEST_F(CollectionValidationColumnStoreIndexTest, SingleInvalidIndexEntryCSI) {
ASSERT_EQ(validateResults.numDocumentsMovedToLostAndFound, 0U) << obj;
ASSERT_EQ(validateResults.numOutdatedMissingIndexEntry, 0U) << obj;
}
+
+ const auto& repairResult = repairIndexCorruptions(nss, numDocs);
+ const auto& validateResults = repairResult.second;
+ const auto obj = resultToBSON(validateResults);
+ ASSERT(validateResults.valid) << obj;
+ ASSERT(validateResults.repaired) << obj;
+
+ const auto warningsWithoutTransientErrors =
+ omitTransientWarnings(validateResults);
+ ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 1U) << obj;
+ ASSERT_STRING_CONTAINS(warningsWithoutTransientErrors.warnings[0],
+ "Inserted 1 missing index entries.")
+ << obj;
+ ASSERT_EQ(validateResults.numRemovedExtraIndexEntries, 1U) << obj;
+ ASSERT_EQ(validateResults.numInsertedMissingIndexEntries, 1U) << obj;
}
}
}
@@ -926,10 +1012,7 @@ TEST_F(CollectionValidationColumnStoreIndexTest, SingleExtraIndexEntry) {
for (const auto& result : results) {
const auto& validateResults = result.second;
- BSONObjBuilder builder;
- const bool debugging = true;
- validateResults.appendToResultObj(&builder, debugging);
- const auto obj = builder.obj();
+ const auto obj = resultToBSON(validateResults);
ASSERT_FALSE(validateResults.valid) << obj;
const auto warningsWithoutTransientErrors =
@@ -964,6 +1047,19 @@ TEST_F(CollectionValidationColumnStoreIndexTest, SingleExtraIndexEntry) {
ASSERT_EQ(validateResults.numDocumentsMovedToLostAndFound, 0U) << obj;
ASSERT_EQ(validateResults.numOutdatedMissingIndexEntry, 0U) << obj;
}
+
+ {
+ const auto& repairResult = repairIndexCorruptions(nss, numDocs);
+ const auto& validateResults = repairResult.second;
+ const auto obj = resultToBSON(validateResults);
+ ASSERT(validateResults.repaired) << obj;
+
+ const auto warningsWithoutTransientErrors =
+ omitTransientWarnings(validateResults);
+ ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 0U) << obj;
+ ASSERT_EQ(validateResults.numRemovedExtraIndexEntries, 1U) << obj;
+ ASSERT_EQ(validateResults.numInsertedMissingIndexEntries, 0U) << obj;
+ }
}
}
}
@@ -1002,10 +1098,7 @@ TEST_F(CollectionValidationColumnStoreIndexTest, SingleMissingIndexEntryCSI) {
for (const auto& result : results) {
const auto& validateResults = result.second;
- BSONObjBuilder builder;
- const bool debugging = true;
- validateResults.appendToResultObj(&builder, debugging);
- const auto obj = builder.obj();
+ const auto obj = resultToBSON(validateResults);
ASSERT_FALSE(validateResults.valid) << obj;
const auto warningsWithoutTransientErrors =
@@ -1042,6 +1135,22 @@ TEST_F(CollectionValidationColumnStoreIndexTest, SingleMissingIndexEntryCSI) {
ASSERT_EQ(validateResults.numDocumentsMovedToLostAndFound, 0U) << obj;
ASSERT_EQ(validateResults.numOutdatedMissingIndexEntry, 0U) << obj;
}
+
+ {
+ const auto& repairResult = repairIndexCorruptions(nss, numDocs);
+ const auto& validateResults = repairResult.second;
+ const auto obj = resultToBSON(validateResults);
+ ASSERT(validateResults.repaired) << obj;
+
+ const auto warningsWithoutTransientErrors =
+ omitTransientWarnings(validateResults);
+ ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 1U) << obj;
+ ASSERT_STRING_CONTAINS(warningsWithoutTransientErrors.warnings[0],
+ "Inserted 1 missing index entries.")
+ << obj;
+ ASSERT_EQ(validateResults.numRemovedExtraIndexEntries, 0U) << obj;
+ ASSERT_EQ(validateResults.numInsertedMissingIndexEntries, 1U) << obj;
+ }
}
}
}
@@ -1090,10 +1199,7 @@ TEST_F(CollectionValidationColumnStoreIndexTest, MultipleInvalidIndexEntryCSI) {
for (const auto& result : results) {
const auto& validateResults = result.second;
- BSONObjBuilder builder;
- const bool debugging = true;
- validateResults.appendToResultObj(&builder, debugging);
- const auto obj = builder.obj();
+ const auto obj = resultToBSON(validateResults);
ASSERT_FALSE(validateResults.valid) << obj;
const auto warningsWithoutTransientErrors = omitTransientWarnings(validateResults);
@@ -1119,6 +1225,21 @@ TEST_F(CollectionValidationColumnStoreIndexTest, MultipleInvalidIndexEntryCSI) {
ASSERT_EQ(validateResults.numDocumentsMovedToLostAndFound, 0U) << obj;
ASSERT_EQ(validateResults.numOutdatedMissingIndexEntry, 0U) << obj;
}
+
+ {
+ const auto& repairResult = repairIndexCorruptions(kNss, numDocs);
+ const auto& validateResults = repairResult.second;
+ const auto obj = resultToBSON(validateResults);
+ ASSERT(validateResults.repaired) << obj;
+
+ const auto warningsWithoutTransientErrors = omitTransientWarnings(validateResults);
+ ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 1U) << obj;
+ ASSERT_STRING_CONTAINS(warningsWithoutTransientErrors.warnings[0],
+ "Inserted 3 missing index entries.")
+ << obj;
+ ASSERT_EQ(validateResults.numRemovedExtraIndexEntries, 5U) << obj;
+ ASSERT_EQ(validateResults.numInsertedMissingIndexEntries, 3U) << obj;
+ }
}
} // namespace
diff --git a/src/mongo/db/catalog/column_index_consistency.cpp b/src/mongo/db/catalog/column_index_consistency.cpp
index dfa6d65b7e6..6739ab316a9 100644
--- a/src/mongo/db/catalog/column_index_consistency.cpp
+++ b/src/mongo/db/catalog/column_index_consistency.cpp
@@ -67,6 +67,8 @@ int64_t ColumnIndexConsistency::traverseIndex(OperationContext* opCtx,
}
}
+ _investigateSuspects(opCtx, index);
+
return numIndexEntries;
}
@@ -76,7 +78,7 @@ void ColumnIndexConsistency::traverseRecord(OperationContext* opCtx,
const RecordId& recordId,
const BSONObj& record,
ValidateResults* results) {
- ColumnStoreAccessMethod* csam = dynamic_cast<ColumnStoreAccessMethod*>(index->accessMethod());
+ ColumnStoreAccessMethod* csam = checked_cast<ColumnStoreAccessMethod*>(index->accessMethod());
// Shred the record.
csam->getKeyGen().visitCellsForInsert(
@@ -95,13 +97,8 @@ void ColumnIndexConsistency::traverseRecord(OperationContext* opCtx,
});
}
-void ColumnIndexConsistency::_addIndexEntryErrors(OperationContext* opCtx,
- const IndexCatalogEntry* index,
- ValidateResults* results) {
-
- if (_firstPhase) {
- return;
- }
+void ColumnIndexConsistency::_investigateSuspects(OperationContext* opCtx,
+ const IndexCatalogEntry* index) {
const auto indexName = index->descriptor()->indexName();
@@ -112,7 +109,7 @@ void ColumnIndexConsistency::_addIndexEntryErrors(OperationContext* opCtx,
const auto& csiCursor = csiCursorIt->second;
const ColumnStoreAccessMethod* csam =
- dynamic_cast<ColumnStoreAccessMethod*>(index->accessMethod());
+ checked_cast<ColumnStoreAccessMethod*>(index->accessMethod());
for (const auto rowId : _suspects) {
// Gather all paths for this RecordId.
@@ -147,16 +144,10 @@ void ColumnIndexConsistency::_addIndexEntryErrors(OperationContext* opCtx,
if (!csiCell) {
// Rowstore has entry that index doesn't
_missingIndexEntries.emplace_back(rowCell);
- results->missingIndexEntries.push_back(
- _generateInfo(csam->indexName(), recordId, path, rowId));
} else if (csiCell->value != rowCell.value) {
// Rowstore and index values diverge
_extraIndexEntries.emplace_back(csiCell.get());
_missingIndexEntries.emplace_back(rowCell);
- results->missingIndexEntries.push_back(_generateInfo(
- csam->indexName(), recordId, path, rowId, csiCell->value));
- results->extraIndexEntries.push_back(
- _generateInfo(csam->indexName(), recordId, path, rowId, rowCell.value));
}
if (paths.count(rowCell.path) == 1) {
paths.erase(rowCell.path);
@@ -170,11 +161,21 @@ void ColumnIndexConsistency::_addIndexEntryErrors(OperationContext* opCtx,
for (const auto& kv : paths) {
for (int i = 0; i < kv.second; i++) {
_extraIndexEntries.emplace_back(kv.first, rowId, ""_sd);
- results->extraIndexEntries.push_back(
- _generateInfo(csam->indexName(), recordId, kv.first, rowId));
}
}
}
+}
+
+
+void ColumnIndexConsistency::_addIndexEntryErrors(OperationContext* opCtx,
+ const IndexCatalogEntry* index,
+ ValidateResults* results) {
+
+ if (_firstPhase) {
+ return;
+ }
+
+ ColumnStoreAccessMethod* csam = checked_cast<ColumnStoreAccessMethod*>(index->accessMethod());
if (!_missingIndexEntries.empty() || !_extraIndexEntries.empty()) {
StringBuilder ss;
@@ -187,12 +188,20 @@ void ColumnIndexConsistency::_addIndexEntryErrors(OperationContext* opCtx,
ss << "Detected " << _missingIndexEntries.size() << " missing index entries.";
results->warnings.push_back(ss.str());
results->valid = false;
+ for (const auto& ent : _missingIndexEntries) {
+ results->missingIndexEntries.push_back(
+ _generateInfo(csam->indexName(), RecordId(ent.rid), ent.path, ent.rid, ent.value));
+ }
}
if (!_extraIndexEntries.empty()) {
StringBuilder ss;
ss << "Detected " << _extraIndexEntries.size() << " extra index entries.";
results->warnings.push_back(ss.str());
results->valid = false;
+ for (const auto& ent : _extraIndexEntries) {
+ results->extraIndexEntries.push_back(
+ _generateInfo(csam->indexName(), RecordId(ent.rid), ent.path, ent.rid, ent.value));
+ }
}
}
@@ -211,8 +220,48 @@ void ColumnIndexConsistency::addIndexEntryErrors(OperationContext* opCtx,
}
}
+void ColumnIndexConsistency::repairIndexEntries(OperationContext* opCtx,
+ const IndexCatalogEntry* index,
+ ValidateResults* results) {
+
+ ColumnStoreAccessMethod* csam = checked_cast<ColumnStoreAccessMethod*>(index->accessMethod());
+
+ writeConflictRetry(opCtx, "removingExtraColumnIndexEntries", _validateState->nss().ns(), [&] {
+ WriteUnitOfWork wunit(opCtx);
+ auto& indexResults = results->indexResultsMap[csam->indexName()];
+ auto cursor = csam->writableStorage()->newWriteCursor(opCtx);
+
+ for (auto it = _extraIndexEntries.begin(); it != _extraIndexEntries.end();) {
+ cursor->remove(it->path, it->rid);
+ results->numRemovedExtraIndexEntries++;
+ indexResults.keysTraversed--;
+ it = _extraIndexEntries.erase(it);
+ }
+
+ for (auto it = _missingIndexEntries.begin(); it != _missingIndexEntries.end();) {
+ cursor->insert(it->path, it->rid, it->value);
+ results->numInsertedMissingIndexEntries++;
+ indexResults.keysTraversed++;
+ it = _missingIndexEntries.erase(it);
+ }
+ wunit.commit();
+ });
+
+ results->repaired = true;
+}
+
void ColumnIndexConsistency::repairIndexEntries(OperationContext* opCtx, ValidateResults* results) {
- // TODO SERVER-71560 Repair should be implemented.
+ int numColumnStoreIndexes = 0;
+ for (const auto& index : _validateState->getIndexes()) {
+ const IndexDescriptor* descriptor = index->descriptor();
+ if (descriptor->getAccessMethodName() == IndexNames::COLUMN) {
+ ++numColumnStoreIndexes;
+ uassert(7106123,
+ "The current implementation only supports a single column-store index.",
+ numColumnStoreIndexes <= 1);
+ repairIndexEntries(opCtx, index.get(), results);
+ }
+ }
}
void ColumnIndexConsistency::validateIndexKeyCount(OperationContext* opCtx,
diff --git a/src/mongo/db/catalog/column_index_consistency.h b/src/mongo/db/catalog/column_index_consistency.h
index a503e8c7154..defe030ba98 100644
--- a/src/mongo/db/catalog/column_index_consistency.h
+++ b/src/mongo/db/catalog/column_index_consistency.h
@@ -92,7 +92,14 @@ public:
}
/**
- * If repair mode enabled, try inserting _missingIndexEntries into indexes.
+ * If repair mode enabled, tries to repair the given column-store index.
+ */
+ void repairIndexEntries(OperationContext* opCtx,
+ const IndexCatalogEntry* index,
+ ValidateResults* results);
+
+ /**
+ * If repair mode enabled, tries to repair the column-store indexes.
*/
void repairIndexEntries(OperationContext* opCtx, ValidateResults* results);
@@ -180,6 +187,8 @@ private:
const IndexCatalogEntry* index,
ValidateResults* results);
+ void _investigateSuspects(OperationContext* opCtx, const IndexCatalogEntry* index);
+
void _tabulateEntry(const FullCellView& cell, int step);
BSONObj _generateInfo(const std::string& indexName,
diff --git a/src/mongo/db/catalog/index_consistency.cpp b/src/mongo/db/catalog/index_consistency.cpp
index f16bb7ebfa3..cf8409499b6 100644
--- a/src/mongo/db/catalog/index_consistency.cpp
+++ b/src/mongo/db/catalog/index_consistency.cpp
@@ -432,6 +432,7 @@ void KeyStringIndexConsistency::addIndexKey(OperationContext* opCtx,
results->numRemovedExtraIndexEntries += numDeleted;
results->repaired = true;
indexInfo->numKeys--;
+ _extraIndexEntries.erase(key);
return;
}