diff options
Diffstat (limited to 'src/mongo')
19 files changed, 2357 insertions, 650 deletions
diff --git a/src/mongo/db/catalog/README.md b/src/mongo/db/catalog/README.md index 005345a996e..35ccea2bbb8 100644 --- a/src/mongo/db/catalog/README.md +++ b/src/mongo/db/catalog/README.md @@ -1399,7 +1399,7 @@ Additionally, users can specify that they'd like to perform a `full` validation. for the collection and each index, data throttling (for background validation), and general information about the collection. + [IndexConsistency](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/index_consistency.h) - keeps track of the number of keys detected in the record store and indexes. Detects when there + descendents keep track of the number of keys detected in the record store and indexes. Detects when there are index inconsistencies and maintains the information about the inconsistencies for reporting. + [ValidateAdaptor](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/validate_adaptor.h) @@ -1418,15 +1418,15 @@ Additionally, users can specify that they'd like to perform a `full` validation. + We choose a read timestamp (`ReadSource`) based on the validation mode: `kNoTimestamp` for foreground validation and `kCheckpoint` for background validation. * Traverses the `RecordStore` using the `ValidateAdaptor` object. - + [Validates each record and adds the document's index key set to the IndexConsistency object](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/validate_adaptor.cpp#L61-L140) + + [Validates each record and adds the document's index key set to the IndexConsistency objects](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/validate_adaptor.cpp#L61-L140) for consistency checks at later stages. - + In an effort to reduce the memory footprint of validation, the `IndexConsistency` object + + In an effort to reduce the memory footprint of validation, the `IndexConsistency` objects [hashes](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/index_consistency.cpp#L307-L309) - the keys passed in to one of many buckets. - + Document keys will + the keys (or paths) passed in to one of many buckets. + + Document keys (or paths) will [increment](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/index_consistency.cpp#L204-L214) the respective bucket. - + Index keys will + + Index keys (paths) will [decrement](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/index_consistency.cpp#L239-L248) the respective bucket. + Checks that the `RecordId` is in [increasing order](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/validate_adaptor.cpp#L305-L308). @@ -1434,8 +1434,8 @@ Additionally, users can specify that they'd like to perform a `full` validation. stored in the `RecordStore` (when performing a foreground validation only). * Traverses the index entries for each index in the collection. + [Validates the index key order to ensure that index entries are in increasing or decreasing order](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/validate_adaptor.cpp#L144-L188). - + Adds the index key to the `IndexConsistency` object for consistency checks at later stages. -* After the traversals are finished, the `IndexConsistency` object is checked to detect any + + Adds the index key to the `IndexConsistency` objects for consistency checks at later stages. +* After the traversals are finished, the `IndexConsistency` objects are checked to detect any inconsistencies between the collection and indexes. + If a bucket has a `value of 0`, then there are no inconsistencies for the keys that hashed there. diff --git a/src/mongo/db/catalog/SConscript b/src/mongo/db/catalog/SConscript index f392123499e..38a6ed37ffe 100644 --- a/src/mongo/db/catalog/SConscript +++ b/src/mongo/db/catalog/SConscript @@ -375,6 +375,7 @@ env.Library( target='catalog_impl', source=[ 'collection_impl.cpp', + 'column_index_consistency.cpp', 'database_holder_impl.cpp', 'database_impl.cpp', 'index_catalog_entry_impl.cpp', @@ -389,6 +390,7 @@ env.Library( '$BUILD_DIR/mongo/db/concurrency/lock_manager', '$BUILD_DIR/mongo/db/curop', '$BUILD_DIR/mongo/db/dbcommands_idl', + '$BUILD_DIR/mongo/db/index/column_store_index', '$BUILD_DIR/mongo/db/index/index_access_method', '$BUILD_DIR/mongo/db/index/index_access_method_factory', '$BUILD_DIR/mongo/db/index/index_access_methods', @@ -463,6 +465,7 @@ env.Library( '$BUILD_DIR/mongo/db/concurrency/exception_util', '$BUILD_DIR/mongo/db/curop', '$BUILD_DIR/mongo/db/index/index_access_method', + '$BUILD_DIR/mongo/db/index/index_access_methods', '$BUILD_DIR/mongo/db/multi_key_path_tracker', '$BUILD_DIR/mongo/db/record_id_helpers', '$BUILD_DIR/mongo/db/server_base', @@ -663,6 +666,7 @@ if wiredtiger: 'collection_validation_test.cpp', 'collection_writer_test.cpp', 'coll_mod_test.cpp', + 'column_index_consistency_test.cpp', 'commit_quorum_options_test.cpp', 'create_collection_test.cpp', 'database_test.cpp', @@ -682,6 +686,7 @@ if wiredtiger: '$BUILD_DIR/mongo/db/catalog/collection_crud', '$BUILD_DIR/mongo/db/commands/test_commands_enabled', '$BUILD_DIR/mongo/db/index/index_access_method', + '$BUILD_DIR/mongo/db/index/index_access_methods', '$BUILD_DIR/mongo/db/index_builds_coordinator_mongod', '$BUILD_DIR/mongo/db/multitenancy', '$BUILD_DIR/mongo/db/op_observer/op_observer', diff --git a/src/mongo/db/catalog/collection_validation.cpp b/src/mongo/db/catalog/collection_validation.cpp index 2c6661a4f5c..0f07a819980 100644 --- a/src/mongo/db/catalog/collection_validation.cpp +++ b/src/mongo/db/catalog/collection_validation.cpp @@ -174,11 +174,10 @@ void _validateIndexes(OperationContext* opCtx, */ void _gatherIndexEntryErrors(OperationContext* opCtx, ValidateState* validateState, - IndexConsistency* indexConsistency, ValidateAdaptor* indexValidator, ValidateResults* result) { - indexConsistency->setSecondPhase(); - if (!indexConsistency->limitMemoryUsageForSecondPhase(result)) { + indexValidator->setSecondPhase(); + if (!indexValidator->limitMemoryUsageForSecondPhase(result)) { return; } @@ -223,12 +222,12 @@ void _gatherIndexEntryErrors(OperationContext* opCtx, } if (validateState->fixErrors()) { - indexConsistency->repairMissingIndexEntries(opCtx, result); + indexValidator->repairIndexEntries(opCtx, result); } LOGV2_OPTIONS(20301, {LogComponent::kIndex}, "Finished traversing through all the indexes"); - indexConsistency->addIndexEntryErrors(result); + indexValidator->addIndexEntryErrors(opCtx, result); } void _validateIndexKeyCount(OperationContext* opCtx, @@ -556,8 +555,7 @@ Status validate(OperationContext* opCtx, logAttrs(validateState.nss()), logAttrs(validateState.uuid())); - IndexConsistency indexConsistency(opCtx, &validateState); - ValidateAdaptor indexValidator(&indexConsistency, &validateState); + ValidateAdaptor indexValidator(opCtx, &validateState); // In traverseRecordStore(), the index validator keeps track the records in the record // store so that _validateIndexes() can confirm that the index entries match the records in @@ -569,10 +567,10 @@ Status validate(OperationContext* opCtx, // Pause collection validation while a lock is held and between collection and index data // validation. // - // The IndexConsistency object saves document key information during collection data - // validation and then compares against that key information during index data validation. - // This fail point is placed in between them, in an attempt to catch any inconsistencies - // that concurrent CRUD ops might cause if we were to have a bug. + // The KeyStringIndexConsistency object saves document key information during collection + // data validation and then compares against that key information during index data + // validation. This fail point is placed in between them, in an attempt to catch any + // inconsistencies that concurrent CRUD ops might cause if we were to have a bug. // // Only useful for background validation because we hold an intent lock instead of an // exclusive lock, and thus allow concurrent operations. @@ -592,14 +590,13 @@ Status validate(OperationContext* opCtx, // Validate indexes and check for mismatches. _validateIndexes(opCtx, &validateState, &indexValidator, results); - if (indexConsistency.haveEntryMismatch()) { + if (indexValidator.haveEntryMismatch()) { LOGV2_OPTIONS(20305, {LogComponent::kIndex}, "Index inconsistencies were detected. " "Starting the second phase of index validation to gather concise errors", "namespace"_attr = validateState.nss()); - _gatherIndexEntryErrors( - opCtx, &validateState, &indexConsistency, &indexValidator, results); + _gatherIndexEntryErrors(opCtx, &validateState, &indexValidator, results); } if (!results->valid) { diff --git a/src/mongo/db/catalog/collection_validation_test.cpp b/src/mongo/db/catalog/collection_validation_test.cpp index 15f8f475aec..19cfe013d13 100644 --- a/src/mongo/db/catalog/collection_validation_test.cpp +++ b/src/mongo/db/catalog/collection_validation_test.cpp @@ -27,13 +27,18 @@ * it in the license file. */ +#include <string> + #include "mongo/bson/util/builder.h" #include "mongo/db/catalog/catalog_test_fixture.h" #include "mongo/db/catalog/collection_validation.h" #include "mongo/db/catalog/collection_write_path.h" +#include "mongo/db/catalog/column_index_consistency.h" #include "mongo/db/db_raii.h" +#include "mongo/db/index/column_key_generator.h" #include "mongo/db/index/index_access_method.h" #include "mongo/db/operation_context.h" +#include "mongo/idl/server_parameter_test_util.h" #include "mongo/stdx/thread.h" #include "mongo/unittest/unittest.h" #include "mongo/util/bufreader.h" @@ -53,7 +58,7 @@ private: CatalogTestFixture::setUp(); // Create collection kNss for unit tests to use. It will possess a default _id index. - CollectionOptions defaultCollectionOptions; + const CollectionOptions defaultCollectionOptions; ASSERT_OK(storageInterface()->createCollection( operationContext(), kNss, defaultCollectionOptions)); }; @@ -66,12 +71,13 @@ protected: }; /** - * Calls validate on collection kNss with both kValidateFull and kValidateNormal validation levels + * Calls validate on collection nss with both kValidateFull and kValidateNormal validation levels * and verifies the results. * - * Returns list of validate results. + * Returns the list of validation results. */ std::vector<std::pair<BSONObj, ValidateResults>> foregroundValidate( + const NamespaceString& nss, OperationContext* opCtx, bool valid, int numRecords, @@ -86,7 +92,7 @@ std::vector<std::pair<BSONObj, ValidateResults>> foregroundValidate( ValidateResults validateResults; BSONObjBuilder output; ASSERT_OK(CollectionValidation::validate( - opCtx, kNss, mode, repairMode, &validateResults, &output)); + opCtx, nss, mode, repairMode, &validateResults, &output)); BSONObj obj = output.obj(); BSONObjBuilder validateResultsBuilder; validateResults.appendToResultObj(&validateResultsBuilder, true /* debugging */); @@ -127,17 +133,21 @@ ValidateResults omitTransientWarnings(const ValidateResults& results) { } /** - * Calls validate on collection kNss with {background:true} and verifies the results. + * Calls validate on collection nss with {background:true} and verifies the results. * If 'runForegroundAsWell' is set, then foregroundValidate() above will be run in addition. + * + * Returns the list of validation results. */ -void backgroundValidate(OperationContext* opCtx, - bool valid, - int numRecords, - int numInvalidDocuments, - int numErrors, - bool runForegroundAsWell) { +std::vector<std::pair<BSONObj, ValidateResults>> backgroundValidate(const NamespaceString& nss, + OperationContext* opCtx, + bool valid, + int numRecords, + int numInvalidDocuments, + int numErrors, + bool runForegroundAsWell) { + std::vector<std::pair<BSONObj, ValidateResults>> res; if (runForegroundAsWell) { - foregroundValidate(opCtx, valid, numRecords, numInvalidDocuments, numErrors); + res = foregroundValidate(nss, opCtx, valid, numRecords, numInvalidDocuments, numErrors); } // This function will force a checkpoint, so background validation can then read from that @@ -149,7 +159,7 @@ void backgroundValidate(OperationContext* opCtx, ValidateResults validateResults; BSONObjBuilder output; ASSERT_OK(CollectionValidation::validate(opCtx, - kNss, + nss, CollectionValidation::ValidateMode::kBackground, CollectionValidation::RepairMode::kNone, &validateResults, @@ -161,24 +171,32 @@ void backgroundValidate(OperationContext* opCtx, ASSERT_EQ(obj.getIntField("nrecords"), numRecords); ASSERT_EQ(obj.getIntField("nInvalidDocuments"), numInvalidDocuments); + + res.push_back({std::make_pair(obj, validateResults)}); + + return res; } /** - * Inserts a range of documents into the kNss collection and then returns that count. - * The range is defined by [startIDNum, endIDNum), not inclusive of endIDNum, using the numbers as - * values for '_id' of the document being inserted. + * Inserts a range of documents into the nss collection and then returns that count. The range is + * defined by [startIDNum, startIDNum+numDocs), not inclusive of (startIDNum+numDocs), using the + * numbers as values for '_id' of the document being inserted followed by numFields fields. */ -int insertDataRange(OperationContext* opCtx, int startIDNum, int endIDNum) { - invariant(startIDNum < endIDNum, - str::stream() << "attempted to insert invalid data range from " << startIDNum - << " to " << endIDNum); - - - AutoGetCollection coll(opCtx, kNss, MODE_IX); +int insertDataRangeForNumFields(const NamespaceString& nss, + OperationContext* opCtx, + const int startIDNum, + const int numDocs, + const int numFields) { + const AutoGetCollection coll(opCtx, nss, MODE_IX); std::vector<InsertStatement> inserts; - for (int i = startIDNum; i < endIDNum; ++i) { - auto doc = BSON("_id" << i); - inserts.push_back(InsertStatement(doc)); + for (int i = 0; i < numDocs; ++i) { + BSONObjBuilder bsonBuilder; + bsonBuilder << "_id" << i + startIDNum; + for (int c = 1; c <= numFields; ++c) { + bsonBuilder << "a" + std::to_string(c) << i + (i * numFields + startIDNum) + c; + } + const auto obj = bsonBuilder.obj(); + inserts.push_back(InsertStatement(obj)); } { @@ -187,7 +205,20 @@ int insertDataRange(OperationContext* opCtx, int startIDNum, int endIDNum) { opCtx, *coll, inserts.begin(), inserts.end(), nullptr, false)); wuow.commit(); } - return endIDNum - startIDNum; + return numDocs; +} + +/** + * Inserts a range of documents into the kNss collection and then returns that count. The range is + * defined by [startIDNum, endIDNum), not inclusive of endIDNum, using the numbers as values for + * '_id' of the document being inserted. + */ +int insertDataRange(OperationContext* opCtx, const int startIDNum, const int endIDNum) { + invariant(startIDNum < endIDNum, + str::stream() << "attempted to insert invalid data range from " << startIDNum + << " to " << endIDNum); + + return insertDataRangeForNumFields(kNss, opCtx, startIDNum, endIDNum - startIDNum, 0); } /** @@ -209,17 +240,301 @@ int setUpInvalidData(OperationContext* opCtx) { return 1; } +class CollectionValidationColumnStoreIndexTest : public CollectionValidationDiskTest { +protected: + CollectionValidationColumnStoreIndexTest() : CollectionValidationDiskTest() {} + + const BSONObj kColumnStoreSpec = BSON("name" + << "$**_columnstore" + << "key" + << BSON("$**" + << "columnstore") + << "v" << 2); + + /** + * This method decorates the execution of column-store index validation tests. It works by + * 1) creating an 'nss' namespace with a column-store index, 2) inserting documents into this + * namespace (via calling 'insertDocsFn'), 3) running validation on this collection and making + * sure it's valid, 4) running 'modifyIndexContentsFn', and 5) running 'postCheckFn', which + * usually contains an index validation call with relevant assertions. + * + * Returns the list of validation results. + */ + std::vector<std::pair<BSONObj, ValidateResults>> runColumnStoreIndexTest( + const NamespaceString& nss, + std::function<int(OperationContext*, repl::StorageInterface*)> insertDocsFn, + std::function<void(OperationContext*, ColumnStore*, int)> modifyIndexContentsFn, + std::function<std::vector<std::pair<BSONObj, ValidateResults>>(OperationContext*, int)> + postCheckFn) { + + RAIIServerParameterControllerForTest controller("featureFlagColumnstoreIndexes", true); + + auto opCtx = operationContext(); + + { + // Durable catalog expects metadata updates to be timestamped but this is + // not necessary in our case - we just want to check the contents of the index table. + // The alternative here would be to provide a commit timestamp with a TimestamptBlock. + repl::UnreplicatedWritesBlock uwb(opCtx); + + ASSERT_OK( + storageInterface()->createIndexesOnEmptyCollection(opCtx, nss, {kColumnStoreSpec})); + } + + const auto numRecords = insertDocsFn(opCtx, storageInterface()); + + // Validate the collection here as a sanity check before we modify the index contents + // in-place. + foregroundValidate(nss, + opCtx, + /*valid*/ true, + /*numRecords*/ numRecords, + /*numInvalidDocuments*/ 0, + /*numErrors*/ 0); + + { + AutoGetCollection autoColl(opCtx, nss, MODE_IX); + + const auto indexCatalog = autoColl->getIndexCatalog(); + const auto descriptor = indexCatalog->findIndexByName(opCtx, "$**_columnstore"); + ASSERT(descriptor) << "Cannot find $**_columnstore in index catalog"; + const auto entry = indexCatalog->getEntry(descriptor); + ASSERT(entry) << "Cannot look up index catalog entry for index $**_columnstore"; + + const auto columnStore = + dynamic_cast<ColumnStoreAccessMethod*>(entry->accessMethod())->writableStorage(); + ASSERT_FALSE(columnStore->isEmpty(opCtx)) + << "index $**_columnstore should not be empty"; + + modifyIndexContentsFn(opCtx, columnStore, numRecords); + } + + return postCheckFn(opCtx, numRecords); + } + + /** + * Represents a fault where an index entry is deleted. + */ + struct DeletionFault {}; + + /** + * Represents a fault where an index entry is additionally inserted. + */ + struct InsertionFault { + // This value is inserted in the target index cell. + std::string insertedIndexValue; + + InsertionFault(std::string iVal = "WRONG_KEY") : insertedIndexValue(iVal) {} + }; + + /** + * Represents a fault where the value of an index entry is replaced with a wrong value. + */ + struct ReplacementFault { + // The actual index value is replaced with this given value in the target index cell. + std::string updatedIndexValue; + + ReplacementFault(std::string uVal = "WRONG_KEY") : updatedIndexValue(uVal) {} + }; + + /** + * Represents an index corruption in field represented by 'fieldIndex' for a document with id + * equals to 'docIndex'. The actual fault can be one of the 'DeletionFault', 'InsertionFault', + * or 'ReplacementFault' options. + */ + struct InjectedCorruption { + int fieldIndex; + int docIndex; + std::variant<DeletionFault, InsertionFault, ReplacementFault> fault; + + InjectedCorruption(int fieldIdx, + int docIdx, + std::variant<DeletionFault, InsertionFault, ReplacementFault> f) + : fieldIndex(fieldIdx), docIndex(docIdx), fault(f) {} + + + /** + * Returns a field index for a field that doesn't exists. As field indexes are assumed to be + * non-negative, it's guaranteed that a negative field index does not exist. 'fldId' is used + * to produce more specific negative numbers for the non-existing field index to help with + * pinpointing the test failures. + */ + static int getNonExistentFieldIndex(const int fldId) { + return -fldId; + } + }; + + int docIndexToRowId(const int docIndex) { + return docIndex + 1L; + } + + /** + * This method runs a column-store index test on 'nss' via a call to 'runColumnStoreIndexTest'. + * First, it populates an 'nss' collection (with a column-store index defined on it) with + * 'numDocs' documents, where each document has 'numFields' fields. Then, applies a series of + * 'corruptions' on the column-store index. Finally, the index validation is ran on this + * collection (which now has a corrupted column-store index) and the validation results are + * returned. + * + * Note: passing 'doBackgroundValidation = true' performs both foreground and background + * validations. However, this can only be done in unit-tests that have a single call to + * this method. + * + * Returns the list of validation results. + */ + std::vector<std::pair<BSONObj, ValidateResults>> validateIndexCorruptions( + const NamespaceString& nss, + const int numFields, + const int numDocs, + const std::vector<InjectedCorruption> corruptions, + const bool doBackgroundValidation = false) { + return runColumnStoreIndexTest( + nss, + /* insertDocsFn */ + [&](OperationContext* opCtx, repl::StorageInterface* storageInterface) -> int { + return insertDataRangeForNumFields( + nss, opCtx, /*startIDNum*/ 1, /*numDocs*/ numDocs, /*numFields*/ numFields); + }, + // modifyIndexContentsFn: For each corruption specified, introduces the corruption and + // then in a separate transaction ensures the corruption is now present. + [&](OperationContext* opCtx, ColumnStore* columnStore, int numRecords) -> void { + const auto getPath = [](int corruptedFldIndex) -> std::string { + return "a" + std::to_string(corruptedFldIndex); + }; + const auto seekToCorruptedIndexEntry = + [&](int corruptedFldIndex, + int corruptedDocIndex) -> boost::optional<FullCellValue> { + auto cursor = columnStore->newCursor(opCtx, getPath(corruptedFldIndex)); + ASSERT(cursor); + const auto res = + cursor->seekAtOrPast(RowId(docIndexToRowId(corruptedDocIndex))); + return res ? FullCellValue(res.value()) : boost::optional<FullCellValue>(); + }; + + for (const auto& corruption : corruptions) { + const int corruptedFldIndex = corruption.fieldIndex; + const int corruptedDocIndex = corruption.docIndex; + + const auto preCorruptionCell = + seekToCorruptedIndexEntry(corruptedFldIndex, corruptedDocIndex); + // Apply the requested corruption in a transaction. + { + WriteUnitOfWork wuow(opCtx); + const auto cursor = columnStore->newWriteCursor(opCtx); + if (std::holds_alternative<ReplacementFault>(corruption.fault)) { + const auto toVal = + std::get<ReplacementFault>(corruption.fault).updatedIndexValue; + columnStore->update(opCtx, + getPath(corruptedFldIndex), + preCorruptionCell->rid, + StringData(toVal)); + } else if (std::holds_alternative<DeletionFault>(corruption.fault)) { + columnStore->remove( + opCtx, getPath(corruptedFldIndex), preCorruptionCell->rid); + } else if (std::holds_alternative<InsertionFault>(corruption.fault)) { + const auto toVal = + std::get<InsertionFault>(corruption.fault).insertedIndexValue; + columnStore->insert(opCtx, + getPath(corruptedFldIndex), + RowId(docIndexToRowId(corruptedDocIndex)), + StringData(toVal)); + } else { + MONGO_UNREACHABLE; + } + wuow.commit(); + } + + // Confirm the requested corruption is actually applied (in a separate + // transaction). + { + + if (std::holds_alternative<ReplacementFault>(corruption.fault)) { + const auto toVal = + std::get<ReplacementFault>(corruption.fault).updatedIndexValue; + const auto corruptedCell = + seekToCorruptedIndexEntry(corruptedFldIndex, corruptedDocIndex); + ASSERT_EQ(corruptedCell->path, getPath(corruptedFldIndex)); + ASSERT_EQ(corruptedCell->rid, preCorruptionCell->rid); + ASSERT_EQ(corruptedCell->value, StringData(toVal)); + } else if (std::holds_alternative<DeletionFault>(corruption.fault)) { + const auto corruptedCell = + seekToCorruptedIndexEntry(corruptedFldIndex, corruptedDocIndex); + if (numDocs == 1 || corruptedDocIndex == numDocs - 1) { + ASSERT_FALSE(corruptedCell); + } else { + ASSERT_EQ(corruptedCell->path, getPath(corruptedFldIndex)); + ASSERT_GT(corruptedCell->rid, preCorruptionCell->rid); + } + } else if (std::holds_alternative<InsertionFault>(corruption.fault)) { + const auto toVal = + std::get<InsertionFault>(corruption.fault).insertedIndexValue; + const auto corruptedCell = + seekToCorruptedIndexEntry(corruptedFldIndex, corruptedDocIndex); + ASSERT_EQ(corruptedCell->path, getPath(corruptedFldIndex)); + ASSERT_EQ(corruptedCell->rid, + RowId(docIndexToRowId(corruptedDocIndex))); + ASSERT_EQ(corruptedCell->value, StringData(toVal)); + } else { + MONGO_UNREACHABLE; + } + } + } + }, + /* postCheckFn */ + [&](OperationContext* opCtx, + int numRecords) -> std::vector<std::pair<BSONObj, ValidateResults>> { + auto serviceContext = opCtx->getServiceContext(); + + // Confirm there is an expected validation error + std::vector<std::pair<BSONObj, ValidateResults>> results; + + if (doBackgroundValidation) { + // Background validation must be done in a separate thread due to the + // assumptions made in its implementation. + stdx::thread runBackgroundValidate = + stdx::thread([&serviceContext, &numRecords, &nss, &results] { + ThreadClient tc("BackgroundValidate-thread", serviceContext); + auto threadOpCtx = tc->makeOperationContext(); + auto bgResults = backgroundValidate(nss, + threadOpCtx.get(), + /*valid*/ false, + /*numRecords*/ numRecords, + /*numInvalidDocuments*/ 0, + /*numErrors*/ 1, + /*runForegroundAsWell*/ false); + results.insert(results.end(), bgResults.begin(), bgResults.end()); + }); + // Make sure the background validation finishes successfully. + runBackgroundValidate.join(); + } + + const auto fgResults = foregroundValidate(nss, + opCtx, + /*valid*/ false, + /*numRecords*/ numRecords, + /*numInvalidDocuments*/ 0, + /*numErrors*/ 1); + + results.insert(results.end(), fgResults.begin(), fgResults.end()); + + return results; + }); + } +}; + // Verify that calling validate() on an empty collection with different validation levels returns an // OK status. TEST_F(CollectionValidationTest, ValidateEmpty) { - foregroundValidate(operationContext(), + foregroundValidate(kNss, + operationContext(), /*valid*/ true, /*numRecords*/ 0, /*numInvalidDocuments*/ 0, /*numErrors*/ 0); } TEST_F(CollectionValidationDiskTest, BackgroundValidateEmpty) { - backgroundValidate(operationContext(), + backgroundValidate(kNss, + operationContext(), /*valid*/ true, /*numRecords*/ 0, /*numInvalidDocuments*/ 0, @@ -230,7 +545,8 @@ TEST_F(CollectionValidationDiskTest, BackgroundValidateEmpty) { // Verify calling validate() on a nonempty collection with different validation levels. TEST_F(CollectionValidationTest, Validate) { auto opCtx = operationContext(); - foregroundValidate(opCtx, + foregroundValidate(kNss, + opCtx, /*valid*/ true, /*numRecords*/ insertDataRange(opCtx, 0, 5), /*numInvalidDocuments*/ 0, @@ -238,7 +554,8 @@ TEST_F(CollectionValidationTest, Validate) { } TEST_F(CollectionValidationDiskTest, BackgroundValidate) { auto opCtx = operationContext(); - backgroundValidate(opCtx, + backgroundValidate(kNss, + opCtx, /*valid*/ true, /*numRecords*/ insertDataRange(opCtx, 0, 5), /*numInvalidDocuments*/ 0, @@ -249,7 +566,8 @@ TEST_F(CollectionValidationDiskTest, BackgroundValidate) { // Verify calling validate() on a collection with an invalid document. TEST_F(CollectionValidationTest, ValidateError) { auto opCtx = operationContext(); - foregroundValidate(opCtx, + foregroundValidate(kNss, + opCtx, /*valid*/ false, /*numRecords*/ setUpInvalidData(opCtx), /*numInvalidDocuments*/ 1, @@ -257,7 +575,8 @@ TEST_F(CollectionValidationTest, ValidateError) { } TEST_F(CollectionValidationDiskTest, BackgroundValidateError) { auto opCtx = operationContext(); - backgroundValidate(opCtx, + backgroundValidate(kNss, + opCtx, /*valid*/ false, /*numRecords*/ setUpInvalidData(opCtx), /*numInvalidDocuments*/ 1, @@ -268,7 +587,8 @@ TEST_F(CollectionValidationDiskTest, BackgroundValidateError) { // Verify calling validate() with enforceFastCount=true. TEST_F(CollectionValidationTest, ValidateEnforceFastCount) { auto opCtx = operationContext(); - foregroundValidate(opCtx, + foregroundValidate(kNss, + opCtx, /*valid*/ true, /*numRecords*/ insertDataRange(opCtx, 0, 5), /*numInvalidDocuments*/ 0, @@ -306,7 +626,7 @@ TEST_F(CollectionValidationDiskTest, BackgroundValidateRunsConcurrentlyWithWrite ThreadClient tc("BackgroundValidateConcurrentWithCRUD-thread", serviceContext); auto threadOpCtx = tc->makeOperationContext(); backgroundValidate( - threadOpCtx.get(), true, numRecords, 0, 0, /*runForegroundAsWell*/ false); + kNss, threadOpCtx.get(), true, numRecords, 0, 0, /*runForegroundAsWell*/ false); }); // Wait until validate starts and hangs mid-way on a failpoint, then do concurrent writes, @@ -319,7 +639,8 @@ TEST_F(CollectionValidationDiskTest, BackgroundValidateRunsConcurrentlyWithWrite runBackgroundValidate.join(); // Run regular foreground collection validation to make sure everything is OK. - foregroundValidate(opCtx, + foregroundValidate(kNss, + opCtx, /*valid*/ true, /*numRecords*/ numRecords + numRecords2, 0, @@ -376,7 +697,7 @@ TEST_F(CollectionValidationTest, ValidateOldUniqueIndexKeyWarning) { // Validate the collection here as a sanity check before we modify the index contents in-place. foregroundValidate( - opCtx, /*valid*/ true, /*numRecords*/ 1, /*numInvalidDocuments*/ 0, /*numErrors*/ 0); + kNss, opCtx, /*valid*/ true, /*numRecords*/ 1, /*numInvalidDocuments*/ 0, /*numErrors*/ 0); // Update existing entry in index to pre-4.2 format without record id in key string. { @@ -429,21 +750,22 @@ TEST_F(CollectionValidationTest, ValidateOldUniqueIndexKeyWarning) { } } - auto results = foregroundValidate(opCtx, - /*valid*/ true, - /*numRecords*/ 1, - /*numInvalidDocuments*/ 0, - /*numErrors*/ 0); + const auto results = foregroundValidate(kNss, + opCtx, + /*valid*/ true, + /*numRecords*/ 1, + /*numInvalidDocuments*/ 0, + /*numErrors*/ 0); ASSERT_EQ(results.size(), 2); for (const auto& result : results) { const auto& validateResults = result.second; BSONObjBuilder builder; - bool debugging = true; + const bool debugging = true; validateResults.appendToResultObj(&builder, debugging); - auto obj = builder.obj(); + const auto obj = builder.obj(); ASSERT(validateResults.valid) << obj; - auto warningsWithoutTransientErrors = omitTransientWarnings(validateResults); + const auto warningsWithoutTransientErrors = omitTransientWarnings(validateResults); ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 1U) << obj; ASSERT_STRING_CONTAINS(warningsWithoutTransientErrors.warnings[0], "Unique index a_1 has one or more keys in the old format") @@ -451,5 +773,353 @@ TEST_F(CollectionValidationTest, ValidateOldUniqueIndexKeyWarning) { } } +/** + * Checks whether a given 'entry' is equal to any element in the 'list'. + */ +bool equalsAny(const std::string& entry, const std::vector<std::string>& list) { + return std::any_of( + list.begin(), list.end(), [&entry](const std::string& other) { return entry == other; }); +} + +// Exhaustively tests having one error in the column-store index by updating one index entry with an +// invalid value in different parts of the index on collections with different number of columns and +// documents. +TEST_F(CollectionValidationColumnStoreIndexTest, SingleInvalidIndexEntryCSI) { + const int kNumFields = 4; + const int kMaxNumDocs = 4; + + int testCaseIdx = 0; + for (int numFields = 1; numFields <= kNumFields; ++numFields) { + for (int numDocs = 1; numDocs <= kMaxNumDocs; ++numDocs) { + for (int corruptedFldIndex = 1; corruptedFldIndex <= numFields; ++corruptedFldIndex) { + for (int corruptedDocIndex = 0; corruptedDocIndex < numDocs; ++corruptedDocIndex) { + const NamespaceString nss(kNss.toString() + std::to_string(++testCaseIdx)); + + // Create collection nss for unit tests to use. + const CollectionOptions defaultCollectionOptions; + ASSERT_OK(storageInterface()->createCollection( + operationContext(), nss, defaultCollectionOptions)); + + const auto results = validateIndexCorruptions( + nss, + numFields, + numDocs, + /* column-store index corruptions */ + {{corruptedFldIndex, + corruptedDocIndex, + /* Update the current index entry with an invalid value. */ + ReplacementFault("WRONG_" + std::to_string(corruptedFldIndex) + "_" + + std::to_string(corruptedDocIndex))}}, + /* doBackgroundValidation */ true); + + ASSERT_EQ(results.size(), 3); + + for (const auto& result : results) { + const auto& validateResults = result.second; + BSONObjBuilder builder; + const bool debugging = true; + validateResults.appendToResultObj(&builder, debugging); + const auto obj = builder.obj(); + ASSERT_FALSE(validateResults.valid) << obj; + + const auto warningsWithoutTransientErrors = + omitTransientWarnings(validateResults); + ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 2U) << obj; + ASSERT(equalsAny("Detected 1 missing index entries.", + warningsWithoutTransientErrors.warnings)) + << obj; + ASSERT(equalsAny("Detected 1 extra index entries.", + warningsWithoutTransientErrors.warnings)) + << obj; + + ASSERT_EQ(validateResults.errors.size(), 1U) << obj; + ASSERT_EQ(validateResults.errors[0], + "Index with name '$**_columnstore' has inconsistencies.") + << obj; + + const auto& extraEntries = validateResults.extraIndexEntries; + ASSERT_EQ(extraEntries.size(), 1U) << obj; + ASSERT_EQ(extraEntries[0]["indexName"].String(), "$**_columnstore") << obj; + ASSERT_EQ(extraEntries[0]["recordId"].Long(), + docIndexToRowId(corruptedDocIndex)) + << obj; + ASSERT_EQ(extraEntries[0]["rowId"].Long(), + docIndexToRowId(corruptedDocIndex)) + << obj; + ASSERT_EQ(extraEntries[0]["indexPath"].String(), + "a" + std::to_string(corruptedFldIndex)) + << obj; + + const auto& missingEntries = validateResults.missingIndexEntries; + ASSERT_EQ(missingEntries.size(), 1U) << obj; + ASSERT_EQ(missingEntries[0]["indexName"].String(), "$**_columnstore") + << obj; + ASSERT_EQ(missingEntries[0]["recordId"].Long(), + docIndexToRowId(corruptedDocIndex)) + << obj; + ASSERT_EQ(missingEntries[0]["rowId"].Long(), + docIndexToRowId(corruptedDocIndex)) + << obj; + ASSERT_EQ(missingEntries[0]["indexPath"].String(), + "a" + std::to_string(corruptedFldIndex)) + << obj; + + ASSERT_EQ(validateResults.corruptRecords.size(), 0U) << obj; + ASSERT_EQ(validateResults.numRemovedCorruptRecords, 0U) << obj; + ASSERT_EQ(validateResults.numRemovedExtraIndexEntries, 0U) << obj; + ASSERT_EQ(validateResults.numInsertedMissingIndexEntries, 0U) << obj; + ASSERT_EQ(validateResults.numDocumentsMovedToLostAndFound, 0U) << obj; + ASSERT_EQ(validateResults.numOutdatedMissingIndexEntry, 0U) << obj; + } + } + } + } + } +} + +// Exhaustively tests having one error in the column-store index by adding an extra index entry on +// collections with different number of columns and documents. +TEST_F(CollectionValidationColumnStoreIndexTest, SingleExtraIndexEntry) { + const int kNumFields = 4; + const int kMaxNumDocs = 4; + + std::vector<std::pair<int, int>> extraIndexEntryCorruptions = { + /* non-existent field on an existing document */ + {/*corruptedFldIndex*/ InjectedCorruption::getNonExistentFieldIndex(1), + /*corruptedDocIndex*/ 0}, + /* non-existent field on a non-existent document */ + {/*corruptedFldIndex*/ InjectedCorruption::getNonExistentFieldIndex(2), + /*corruptedDocIndex*/ kMaxNumDocs * 10}}; + for (int corruptedFldIndex = 1; corruptedFldIndex <= kNumFields; ++corruptedFldIndex) { + /* existing field on a non-existent document */ + extraIndexEntryCorruptions.push_back( + {corruptedFldIndex, /*corruptedDocIndex*/ kMaxNumDocs * 10}); + } + + int testCaseIdx = 0; + for (int numFields = 1; numFields <= kNumFields; ++numFields) { + for (int numDocs = 1; numDocs <= kMaxNumDocs; ++numDocs) { + for (const auto& corruption : extraIndexEntryCorruptions) { + const int corruptedFldIndex = corruption.first; + const int corruptedDocIndex = corruption.second; + + const auto nss = NamespaceString(kNss.toString() + std::to_string(++testCaseIdx)); + + // Create collection nss for unit tests to use. + const CollectionOptions defaultCollectionOptions; + ASSERT_OK(storageInterface()->createCollection( + operationContext(), nss, defaultCollectionOptions)); + + const auto results = validateIndexCorruptions( + nss, + numFields, + numDocs, + /* column-store index corruptions */ + {{corruptedFldIndex, + corruptedDocIndex, + /* Insert an extra index entry. */ + InsertionFault("WRONG_" + std::to_string(corruptedFldIndex) + "_" + + std::to_string(corruptedDocIndex))}}, + /* doBackgroundValidation */ true); + + ASSERT_EQ(results.size(), 3); + + for (const auto& result : results) { + const auto& validateResults = result.second; + BSONObjBuilder builder; + const bool debugging = true; + validateResults.appendToResultObj(&builder, debugging); + const auto obj = builder.obj(); + ASSERT_FALSE(validateResults.valid) << obj; + + const auto warningsWithoutTransientErrors = + omitTransientWarnings(validateResults); + ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 1U) << obj; + ASSERT(fmt::format("Detected {} extra index entries.", 1) == + warningsWithoutTransientErrors.warnings[0]) + << obj; + + ASSERT_EQ(validateResults.errors.size(), 1U) << obj; + ASSERT_EQ(validateResults.errors[0], + "Index with name '$**_columnstore' has inconsistencies.") + << obj; + + const auto& extraEntries = validateResults.extraIndexEntries; + ASSERT_EQ(extraEntries.size(), 1) << obj; + ASSERT_EQ(extraEntries[0]["indexName"].String(), "$**_columnstore") << obj; + ASSERT_EQ(extraEntries[0]["recordId"].Long(), + docIndexToRowId(corruptedDocIndex)) + << obj; + ASSERT_EQ(extraEntries[0]["rowId"].Long(), docIndexToRowId(corruptedDocIndex)) + << obj; + ASSERT_EQ(extraEntries[0]["indexPath"].String(), + "a" + std::to_string(corruptedFldIndex)) + << obj; + + ASSERT_EQ(validateResults.missingIndexEntries.size(), 0U) << obj; + ASSERT_EQ(validateResults.corruptRecords.size(), 0U) << obj; + ASSERT_EQ(validateResults.numRemovedCorruptRecords, 0U) << obj; + ASSERT_EQ(validateResults.numRemovedExtraIndexEntries, 0U) << obj; + ASSERT_EQ(validateResults.numInsertedMissingIndexEntries, 0U) << obj; + ASSERT_EQ(validateResults.numDocumentsMovedToLostAndFound, 0U) << obj; + ASSERT_EQ(validateResults.numOutdatedMissingIndexEntry, 0U) << obj; + } + } + } + } +} + +// Exhaustively tests having one error in the column-store index by removing one index entry from +// different parts of the index on collections with different number of columns and documents. +TEST_F(CollectionValidationColumnStoreIndexTest, SingleMissingIndexEntryCSI) { + const int kNumFields = 4; + const int kMaxNumDocs = 4; + + int testCaseIdx = 0; + for (int numFields = 1; numFields <= kNumFields; ++numFields) { + for (int numDocs = 1; numDocs <= kMaxNumDocs; ++numDocs) { + for (int corruptedFldIndex = 1; corruptedFldIndex <= numFields; ++corruptedFldIndex) { + for (int corruptedDocIndex = 0; corruptedDocIndex < numDocs; ++corruptedDocIndex) { + const NamespaceString nss(kNss.toString() + std::to_string(++testCaseIdx)); + + // Create collection nss for unit tests to use. + const CollectionOptions defaultCollectionOptions; + ASSERT_OK(storageInterface()->createCollection( + operationContext(), nss, defaultCollectionOptions)); + + const auto results = + validateIndexCorruptions(nss, + numFields, + numDocs, + /* column-store index corruptions */ + {{corruptedFldIndex, + corruptedDocIndex, + /* Remove the existing index entry. */ + DeletionFault()}}, + /* doBackgroundValidation */ true); + + ASSERT_EQ(results.size(), 3); + + for (const auto& result : results) { + const auto& validateResults = result.second; + BSONObjBuilder builder; + const bool debugging = true; + validateResults.appendToResultObj(&builder, debugging); + const auto obj = builder.obj(); + ASSERT_FALSE(validateResults.valid) << obj; + + const auto warningsWithoutTransientErrors = + omitTransientWarnings(validateResults); + ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 1U) << obj; + ASSERT("Detected 1 missing index entries." == + warningsWithoutTransientErrors.warnings[0]) + << obj; + + ASSERT_EQ(validateResults.errors.size(), 1U) << obj; + ASSERT_EQ(validateResults.errors[0], + "Index with name '$**_columnstore' has inconsistencies.") + << obj; + + const auto& missingEntries = validateResults.missingIndexEntries; + ASSERT_EQ(missingEntries.size(), 1U) << obj; + ASSERT_EQ(missingEntries[0]["indexName"].String(), "$**_columnstore") + << obj; + ASSERT_EQ(missingEntries[0]["recordId"].Long(), + docIndexToRowId(corruptedDocIndex)) + << obj; + ASSERT_EQ(missingEntries[0]["rowId"].Long(), + docIndexToRowId(corruptedDocIndex)) + << obj; + ASSERT_EQ(missingEntries[0]["indexPath"].String(), + "a" + std::to_string(corruptedFldIndex)) + << obj; + + ASSERT_EQ(validateResults.extraIndexEntries.size(), 0U) << obj; + ASSERT_EQ(validateResults.corruptRecords.size(), 0U) << obj; + ASSERT_EQ(validateResults.numRemovedCorruptRecords, 0U) << obj; + ASSERT_EQ(validateResults.numRemovedExtraIndexEntries, 0U) << obj; + ASSERT_EQ(validateResults.numInsertedMissingIndexEntries, 0U) << obj; + ASSERT_EQ(validateResults.numDocumentsMovedToLostAndFound, 0U) << obj; + ASSERT_EQ(validateResults.numOutdatedMissingIndexEntry, 0U) << obj; + } + } + } + } + } +} + +// Tests having multiple errors in the column-store index by updating several index entries with an +// invalid value in different parts of the index on a collection with multiple columns and multiple +// documents. +TEST_F(CollectionValidationColumnStoreIndexTest, MultipleInvalidIndexEntryCSI) { + const int numFields = 10; + const int numDocs = 50; + + auto results = validateIndexCorruptions( + kNss, + numFields, + numDocs, + /* column-store index corruptions */ + {{/* corruptedFldIndex */ 5, + /* corruptedDocIndex */ 9, + /* Remove the existing index entry. */ + DeletionFault()}, + {/* corruptedFldIndex */ 7, + /* corruptedDocIndex */ 2, + /* Update the current index entry with an invalid value. */ + ReplacementFault()}, + {/* corruptedFldIndex */ 9, + /* corruptedDocIndex */ 500, + /* Insert an extra index entry for a non-exisiting document. */ + InsertionFault()}, + {/* corruptedFldIndex */ InjectedCorruption::getNonExistentFieldIndex(1), + /* corruptedDocIndex */ 5, + /* Insert an extra index entry for a non-existing field of an existing document. */ + InsertionFault()}, + {/* corruptedFldIndex */ InjectedCorruption::getNonExistentFieldIndex(2), + /* corruptedDocIndex */ 600, + /* Insert an extra index entry for a non-existing field of a non-existing document. */ + InsertionFault()}, + {/* corruptedFldIndex */ 2, + /* corruptedDocIndex */ 33, + /* Update the current index entry with an invalid value. */ + ReplacementFault()}}, + /* doBackgroundValidation */ true); + + ASSERT_EQ(results.size(), 3); + + for (const auto& result : results) { + const auto& validateResults = result.second; + BSONObjBuilder builder; + const bool debugging = true; + validateResults.appendToResultObj(&builder, debugging); + const auto obj = builder.obj(); + ASSERT_FALSE(validateResults.valid) << obj; + + const auto warningsWithoutTransientErrors = omitTransientWarnings(validateResults); + ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 2U) << obj; + ASSERT( + equalsAny("Detected 3 missing index entries.", warningsWithoutTransientErrors.warnings)) + << obj; + ASSERT( + equalsAny("Detected 5 extra index entries.", warningsWithoutTransientErrors.warnings)) + << obj; + + ASSERT_EQ(validateResults.errors.size(), 1U) << obj; + ASSERT_EQ(validateResults.errors[0], + "Index with name '$**_columnstore' has inconsistencies.") + << obj; + + ASSERT_EQ(validateResults.missingIndexEntries.size(), 3U) << obj; + ASSERT_EQ(validateResults.extraIndexEntries.size(), 5U) << obj; + ASSERT_EQ(validateResults.corruptRecords.size(), 0U) << obj; + ASSERT_EQ(validateResults.numRemovedCorruptRecords, 0U) << obj; + ASSERT_EQ(validateResults.numRemovedExtraIndexEntries, 0U) << obj; + ASSERT_EQ(validateResults.numInsertedMissingIndexEntries, 0U) << obj; + ASSERT_EQ(validateResults.numDocumentsMovedToLostAndFound, 0U) << obj; + ASSERT_EQ(validateResults.numOutdatedMissingIndexEntry, 0U) << obj; + } +} + } // namespace } // namespace mongo diff --git a/src/mongo/db/catalog/column_index_consistency.cpp b/src/mongo/db/catalog/column_index_consistency.cpp new file mode 100644 index 00000000000..dfa6d65b7e6 --- /dev/null +++ b/src/mongo/db/catalog/column_index_consistency.cpp @@ -0,0 +1,282 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/catalog/column_index_consistency.h" +#include "mongo/db/concurrency/exception_util.h" + +namespace mongo { + +int64_t ColumnIndexConsistency::traverseIndex(OperationContext* opCtx, + const IndexCatalogEntry* index, + ProgressMeterHolder& _progress, + ValidateResults* results) { + const auto indexName = index->descriptor()->indexName(); + + // Ensure that this index has an open index cursor. + const auto csiCursorIt = _validateState->getColumnStoreCursors().find(indexName); + invariant(csiCursorIt != _validateState->getColumnStoreCursors().end()); + + const std::unique_ptr<ColumnStore::Cursor>& csiCursor = csiCursorIt->second; + + int64_t numIndexEntries = 0; + // Traverse the index in this for loop. + for (auto cell = csiCursor->seekAtOrPast("", ColumnStore::kNullRowId); cell; + cell = csiCursor->next()) { + { + stdx::unique_lock<Client> lk(*opCtx->getClient()); + _progress.get(lk)->hit(); + } + ++numIndexEntries; + + if (numIndexEntries % kInterruptIntervalNumRecords == 0) { + // Periodically checks for interrupts and yields. + opCtx->checkForInterrupt(); + _validateState->yield(opCtx); + } + + if (_firstPhase) { + addIndexEntry(cell.get()); + } else { + _updateSuspectList(cell.get(), results); + } + } + + return numIndexEntries; +} + +void ColumnIndexConsistency::traverseRecord(OperationContext* opCtx, + const CollectionPtr& coll, + const IndexCatalogEntry* index, + const RecordId& recordId, + const BSONObj& record, + ValidateResults* results) { + ColumnStoreAccessMethod* csam = dynamic_cast<ColumnStoreAccessMethod*>(index->accessMethod()); + + // Shred the record. + csam->getKeyGen().visitCellsForInsert( + record, [&](PathView path, const column_keygen::UnencodedCellView& unencodedCell) { + _cellBuilder.reset(); + column_keygen::writeEncodedCell(unencodedCell, &_cellBuilder); + tassert( + 7106112, "RecordID cannot be a string for column store indexes", !recordId.isStr()); + const auto cell = FullCellView{ + path, recordId.getLong(), CellView(_cellBuilder.buf(), _cellBuilder.len())}; + if (_firstPhase) { + addDocEntry(cell); + } else { + _updateSuspectList(cell, results); + } + }); +} + +void ColumnIndexConsistency::_addIndexEntryErrors(OperationContext* opCtx, + const IndexCatalogEntry* index, + ValidateResults* results) { + + if (_firstPhase) { + return; + } + + const auto indexName = index->descriptor()->indexName(); + + // Ensure that this index has an open index cursor. + const auto csiCursorIt = _validateState->getColumnStoreCursors().find(indexName); + invariant(csiCursorIt != _validateState->getColumnStoreCursors().end()); + + const auto& csiCursor = csiCursorIt->second; + + const ColumnStoreAccessMethod* csam = + dynamic_cast<ColumnStoreAccessMethod*>(index->accessMethod()); + + for (const auto rowId : _suspects) { + // Gather all paths for this RecordId. + StringMap<int> paths; + PathValue nextPath = ""; + while (auto next = csiCursor->seekAtOrPast(nextPath, rowId)) { + if (next->rid < rowId) { + nextPath.assign(next->path.rawData(), next->path.size()); + continue; + } + + if (next->rid == rowId) { + paths[next->path]++; + } + nextPath.assign(next->path.rawData(), next->path.size()); + nextPath += '\x01'; // Next possible path (\0 is not allowed). + } + + const RecordId recordId(rowId); + const auto& rowStoreCursor = _validateState->getTraverseRecordStoreCursor(); + if (const auto record = rowStoreCursor->seekExact(opCtx, recordId); record) { + // Shred the document and check each path/value against csi(path, rowId). + csam->getKeyGen().visitCellsForInsert( + record->data.toBson(), + [&](PathView path, const column_keygen::UnencodedCellView& cell) { + _cellBuilder.reset(); + column_keygen::writeEncodedCell(cell, &_cellBuilder); + const auto rowCell = + FullCellView{path, rowId, CellView(_cellBuilder.buf(), _cellBuilder.len())}; + + const auto csiCell = csiCursor->seekExact(path, rowId); + if (!csiCell) { + // Rowstore has entry that index doesn't + _missingIndexEntries.emplace_back(rowCell); + results->missingIndexEntries.push_back( + _generateInfo(csam->indexName(), recordId, path, rowId)); + } else if (csiCell->value != rowCell.value) { + // Rowstore and index values diverge + _extraIndexEntries.emplace_back(csiCell.get()); + _missingIndexEntries.emplace_back(rowCell); + results->missingIndexEntries.push_back(_generateInfo( + csam->indexName(), recordId, path, rowId, csiCell->value)); + results->extraIndexEntries.push_back( + _generateInfo(csam->indexName(), recordId, path, rowId, rowCell.value)); + } + if (paths.count(rowCell.path) == 1) { + paths.erase(rowCell.path); + } else { + paths[rowCell.path]--; + } + }); + } + + // Extra paths in index that don't exist in the row-store. + for (const auto& kv : paths) { + for (int i = 0; i < kv.second; i++) { + _extraIndexEntries.emplace_back(kv.first, rowId, ""_sd); + results->extraIndexEntries.push_back( + _generateInfo(csam->indexName(), recordId, kv.first, rowId)); + } + } + } + + if (!_missingIndexEntries.empty() || !_extraIndexEntries.empty()) { + StringBuilder ss; + ss << "Index with name '" << csam->indexName() << "' has inconsistencies."; + results->errors.push_back(ss.str()); + results->indexResultsMap.at(csam->indexName()).valid = false; + } + if (!_missingIndexEntries.empty()) { + StringBuilder ss; + ss << "Detected " << _missingIndexEntries.size() << " missing index entries."; + results->warnings.push_back(ss.str()); + results->valid = false; + } + if (!_extraIndexEntries.empty()) { + StringBuilder ss; + ss << "Detected " << _extraIndexEntries.size() << " extra index entries."; + results->warnings.push_back(ss.str()); + results->valid = false; + } +} + +void ColumnIndexConsistency::addIndexEntryErrors(OperationContext* opCtx, + ValidateResults* results) { + int numColumnStoreIndexes = 0; + for (const auto& index : _validateState->getIndexes()) { + const IndexDescriptor* descriptor = index->descriptor(); + if (descriptor->getAccessMethodName() == IndexNames::COLUMN) { + ++numColumnStoreIndexes; + uassert(7106138, + "The current implementation only supports a single column-store index.", + numColumnStoreIndexes <= 1); + _addIndexEntryErrors(opCtx, index.get(), results); + } + } +} + +void ColumnIndexConsistency::repairIndexEntries(OperationContext* opCtx, ValidateResults* results) { + // TODO SERVER-71560 Repair should be implemented. +} + +void ColumnIndexConsistency::validateIndexKeyCount(OperationContext* opCtx, + const IndexCatalogEntry* index, + long long* numRecords, + IndexValidateResults& results) { + // Nothing to do. +} + +// Generate info for the validate() command output. +BSONObj ColumnIndexConsistency::_generateInfo(const std::string& indexName, + const RecordId& recordId, + const PathView path, + RowId rowId, + StringData value) { + BSONObjBuilder infoBuilder; + + infoBuilder.append("indexName", indexName); + recordId.serializeToken("recordId", &infoBuilder); + if (rowId != ColumnStore::kNullRowId) { + infoBuilder.append("rowId", rowId); + } + if (!path.empty()) { + infoBuilder.append("indexPath", path); + } + if (!value.empty()) { + infoBuilder.append("value", value); + } + return infoBuilder.obj(); +} + +void ColumnIndexConsistency::addDocEntry(const FullCellView& val) { + _tabulateEntry(val, 1); + _numDocs++; +} + +void ColumnIndexConsistency::addIndexEntry(const FullCellView& val) { + _tabulateEntry(val, -1); + _numIndexEntries++; +} + +void ColumnIndexConsistency::_updateSuspectList(const FullCellView& cell, + ValidateResults* results) { + const auto rawHash = _hash(cell); + const auto hashLower = rawHash % _indexKeyBuckets.size(); + const auto hashUpper = (rawHash / _indexKeyBuckets.size()) % _indexKeyBuckets.size(); + + if (_indexKeyBuckets[hashLower].indexKeyCount != 0 || + _indexKeyBuckets[hashUpper].indexKeyCount != 0) { + _suspects.insert(cell.rid); + } +} + +void ColumnIndexConsistency::_tabulateEntry(const FullCellView& cell, int step) { + const auto rawHash = _hash(cell); + const auto hashLower = rawHash % _indexKeyBuckets.size(); + const auto hashUpper = (rawHash / _indexKeyBuckets.size()) % _indexKeyBuckets.size(); + auto& lower = _indexKeyBuckets[hashLower]; + auto& upper = _indexKeyBuckets[hashUpper]; + const auto cellSz = cell.path.size() + sizeof(cell.rid) + cell.value.size(); + + lower.indexKeyCount += step; + lower.bucketSizeBytes += cellSz; + upper.indexKeyCount += step; + upper.bucketSizeBytes += cellSz; +} +} // namespace mongo diff --git a/src/mongo/db/catalog/column_index_consistency.h b/src/mongo/db/catalog/column_index_consistency.h new file mode 100644 index 00000000000..a503e8c7154 --- /dev/null +++ b/src/mongo/db/catalog/column_index_consistency.h @@ -0,0 +1,201 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <absl/hash/hash.h> +#include <algorithm> +#include <vector> + +#include "mongo/db/catalog/index_consistency.h" +#include "mongo/db/catalog/throttle_cursor.h" +#include "mongo/db/catalog/validate_state.h" +#include "mongo/db/index/columns_access_method.h" +#include "mongo/db/storage/column_store.h" +#include "mongo/stdx/unordered_set.h" +#include "mongo/util/progress_meter.h" + +namespace mongo { + +/** + * The ColumnIndexConsistency class is used to keep track of the index consistency in column-stor + * indexes. It does this by using the index keys from index entries and index keys generated from + * the document to ensure there is a one-to-one mapping for each key. + * In addition, an IndexObserver class can be hooked into the IndexAccessMethod to inform this class + * about changes to the indexes during a validation and compensate for them. + */ +class ColumnIndexConsistency final : protected IndexConsistency { + +public: + ColumnIndexConsistency(OperationContext* opCtx, + CollectionValidation::ValidateState* validateState, + const size_t numHashBuckets = kNumHashBuckets) + : IndexConsistency(opCtx, validateState, numHashBuckets) {} + + void setSecondPhase() { + IndexConsistency::setSecondPhase(); + } + + /** + * Traverses the column-store index via 'cursor' and accumulates the traversal results. + */ + int64_t traverseIndex(OperationContext* opCtx, + const IndexCatalogEntry* index, + ProgressMeterHolder& _progress, + ValidateResults* results); + + /** + * Traverses all paths in a single record from the row-store via the given {'recordId','record'} + * pair and accumulates the traversal results. + */ + void traverseRecord(OperationContext* opCtx, + const CollectionPtr& coll, + const IndexCatalogEntry* index, + const RecordId& recordId, + const BSONObj& recordBson, + ValidateResults* results); + + /** + * Returns true if any value in the `_indexKeyCount` map is not equal to 0, otherwise return + * false. + */ + bool haveEntryMismatch() const { + return std::any_of(_indexKeyBuckets.cbegin(), _indexKeyBuckets.cend(), [](auto b) { + return b.indexKeyCount != 0; + }); + } + + /** + * If repair mode enabled, try inserting _missingIndexEntries into indexes. + */ + void repairIndexEntries(OperationContext* opCtx, ValidateResults* results); + + /** + * Records the errors gathered from the second phase of index validation into the provided + * ValidateResultsMap and ValidateResults. + */ + void addIndexEntryErrors(OperationContext* opCtx, ValidateResults* results); + + /** + * Sets up this IndexConsistency object to limit memory usage in the second phase of index + * validation. Returns whether the memory limit is sufficient to report at least one index entry + * inconsistency and continue with the second phase of validation. + */ + bool limitMemoryUsageForSecondPhase(ValidateResults* result) { + return true; + } + + void validateIndexKeyCount(OperationContext* opCtx, + const IndexCatalogEntry* index, + long long* numRecords, + IndexValidateResults& results); + + uint64_t getTotalIndexKeys() { + return _numIndexEntries; + } + + ////////////////////////////////////////////////////////// + // Beginning of methods being public for testing purposes + ////////////////////////////////////////////////////////// + int64_t getNumDocs() const { + return _numDocs; + } + + uint32_t getBucketCount(size_t bucketNum) const { + invariant(bucketNum < _indexKeyBuckets.size()); + return _indexKeyBuckets[bucketNum].indexKeyCount; + } + + uint32_t getBucketSizeBytes(size_t bucketNum) const { + invariant(bucketNum < _indexKeyBuckets.size()); + return _indexKeyBuckets[bucketNum].bucketSizeBytes; + } + + /** + * Accumulates the info about a cell extracted from a shredded row-store record. + */ + void addDocEntry(const FullCellView& val); + + /** + * Accumulates the info about a column-store index entry cell. + */ + void addIndexEntry(const FullCellView& val); + + static uint32_t _hash(const FullCellView& cell, uint64_t seed = 0) { + MONGO_STATIC_ASSERT_MSG(std::is_same<decltype(cell.rid), int64_t>::value, + "'rid' should be an int64 (i.e., it's not a class) so that we're " + "not hashing based on the compiler chosen layout for a struct."); + + MONGO_STATIC_ASSERT_MSG(sizeof(uint64_t) <= sizeof(size_t), + "Unable to safely store a uint64_t value in a size_t variable"); + + size_t hash = static_cast<size_t>(seed); + boost::hash_combine(hash, + absl::hash_internal::CityHash64(cell.path.rawData(), cell.path.size())); + boost::hash_combine(hash, + absl::hash_internal::CityHash64( + reinterpret_cast<const char*>(&cell.rid), sizeof(cell.rid))); + boost::hash_combine( + hash, absl::hash_internal::CityHash64(cell.value.rawData(), cell.value.size())); + return hash; + } + ////////////////////////////////////////////////////////// + // End of methods being public for testing purposes + ////////////////////////////////////////////////////////// + +private: + ColumnIndexConsistency() = delete; + + /** + * Pinpoints the errors from the accumulated information from traversal of both row-store and + * column-store index and adds these errors to 'results'. + */ + void _addIndexEntryErrors(OperationContext* opCtx, + const IndexCatalogEntry* index, + ValidateResults* results); + + void _tabulateEntry(const FullCellView& cell, int step); + + BSONObj _generateInfo(const std::string& indexName, + const RecordId& recordId, + PathView path, + RowId rowId, + StringData value = ""); + + void _updateSuspectList(const FullCellView& cell, ValidateResults* results); + + std::vector<FullCellValue> _missingIndexEntries; + std::vector<FullCellValue> _extraIndexEntries; + + stdx::unordered_set<RowId> _suspects; + int64_t _numDocs = 0; + int64_t _numIndexEntries = 0; + BufBuilder _cellBuilder; +}; +} // namespace mongo diff --git a/src/mongo/db/catalog/column_index_consistency_test.cpp b/src/mongo/db/catalog/column_index_consistency_test.cpp new file mode 100644 index 00000000000..0199b9627e1 --- /dev/null +++ b/src/mongo/db/catalog/column_index_consistency_test.cpp @@ -0,0 +1,261 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/catalog/catalog_test_fixture.h" +#include "mongo/db/catalog/column_index_consistency.h" +#include "mongo/idl/server_parameter_test_util.h" +#include "mongo/stdx/unordered_set.h" +#include "mongo/unittest/unittest.h" + +namespace mongo { + +namespace { + +const NamespaceString kNss = NamespaceString("test.t"); + +class ColumnIndexConsistencyTest : public CatalogTestFixture { +protected: + ColumnIndexConsistencyTest(Options options = {}) : CatalogTestFixture(std::move(options)) {} + + ColumnIndexConsistency createColumnStoreConsistency( + const size_t numHashBuckets = IndexConsistency::kNumHashBuckets) { + return ColumnIndexConsistency(operationContext(), _validateState.get(), numHashBuckets); + } + + void setUp() override { + CatalogTestFixture::setUp(); + + auto opCtx = operationContext(); + + // Create collection kNss for unit tests to use. It will possess a default _id index. + const CollectionOptions defaultCollectionOptions; + ASSERT_OK(storageInterface()->createCollection(opCtx, kNss, defaultCollectionOptions)); + + _validateState = std::make_unique<CollectionValidation::ValidateState>( + opCtx, + kNss, + CollectionValidation::ValidateMode::kForeground, + CollectionValidation::RepairMode::kNone); + }; + + void tearDown() override { + _validateState.reset(); + CatalogTestFixture::tearDown(); + } + + std::unique_ptr<CollectionValidation::ValidateState> _validateState; +}; + +const std::vector<FullCellView> kEntries{ + {"path1"_sd, 1, "value1"_sd}, + {"path1"_sd, 2, "value1"_sd}, + {"path1"_sd, 3, "value1"_sd}, + + {"path1"_sd, 4, "value1"_sd}, + {"path2"_sd, 4, "value1"_sd}, + {"path3"_sd, 4, "value1"_sd}, + + {"path4"_sd, 5, "value1"_sd}, + {"path4"_sd, 5, "value2"_sd}, + {"path4"_sd, 5, "value3"_sd}, + + {"path4"_sd, 0x1000'0000'0000'0001, "value3"_sd}, + {"path4"_sd, 0x2000'0000'0000'0001, "value3"_sd}, + {"path4"_sd, 0x3000'0000'0000'0001, "value3"_sd}, + + {"path44"_sd, 5, "value3"_sd}, + {"path4"_sd, 5, "value33"_sd}, + + {"path1"_sd, 1, "1value1"_sd}, + {"path1"_sd, 2, "1value1"_sd}, + {"path1"_sd, 3, "1value1"_sd}, + + {"path1"_sd, 4, "1value1"_sd}, + {"path2"_sd, 4, "1value1"_sd}, + {"path3"_sd, 4, "1value1"_sd}, + + {"path4"_sd, 5, "1value1"_sd}, + {"path4"_sd, 5, "1value2"_sd}, + {"path4"_sd, 5, "1value3"_sd}, + + {"path4"_sd, 0x1000'0000'0000'0001, "value32"_sd}, + {"path4"_sd, 0x2000'0000'0000'0001, "value33"_sd}, + {"path4"_sd, 0x3000'0000'0000'0001, "value34"_sd}, + + {"path44"_sd, 5, "4value3"_sd}, + {"path4"_sd, 5, "4value33"_sd}, + + // A couple duplicates + {"path44"_sd, 5, "4value3"_sd}, + {"path4"_sd, 5, "4value33"_sd}, + + {"path.some.path"_sd, 5, "a dog likes to chew"_sd}, + {"path.some.path"_sd, 6, "a dog likes to bite"_sd}, + {"path.some.path"_sd, 7, "a dog likes to sleep"_sd}, + {"path.some.path"_sd, 8, "a dog likes to eat"_sd}, + {"path.some.path"_sd, 9, "a dog likes to walk"_sd}, + {"path.some.path"_sd, 10, "a dog likes to play"_sd}, + {"path.some.path"_sd, 11, "a dog likes to drool"_sd}, + {"path.some.path"_sd, 12, "a dog likes to sniff"_sd}, + {"path.some.path"_sd, 13, "a dog likes to snuggle"_sd}, + {"path.some.path"_sd, 14, "a dog likes to go to the park with other dogs"_sd}, + + {"path.some.otherpath"_sd, 1, "life is understood backwards but lived forwards"_sd}, + {"path.some.otherpath"_sd, 2, "the key is not spending time but investing it"_sd}, + {"path.some.otherpath"_sd, 3, "if we take care of the moments, the years"_sd}, + {"path.some.otherpath"_sd, 4, "time is an illusion"_sd}, + {"path.some.otherpath"_sd, 5, "life and times"_sd}, + {"path.some.otherpath"_sd, 667, "how many more times"_sd}, + + {"still.some.otherpath"_sd, 667, "islands in the stream"_sd}, + {"still.some.otherpath"_sd, 668, "the gambler"_sd}, + {"still.some.otherpath"_sd, 669, "coward of the county"_sd}, + {"still.some.otherpath"_sd, 662, "lady"_sd}, + {"still.some.otherpath"_sd, 661, "we've got tonight"_sd}, + {"still.some.otherpath"_sd, 660, "through the years"_sd}, +}; + +TEST_F(ColumnIndexConsistencyTest, VerifyHashFunction) { + stdx::unordered_set<int64_t> hashes; + for (const auto& v : kEntries) { + hashes.insert(ColumnIndexConsistency::_hash(v)); + } + ASSERT_EQ(hashes.size(), 50u); +} + +TEST_F(ColumnIndexConsistencyTest, VerifyEntriesEqual) { + ColumnIndexConsistency cic = createColumnStoreConsistency(); + + for (const auto& v : kEntries) { + cic.addDocEntry(v); + } + for (const auto& v : kEntries) { + cic.addIndexEntry(v); + } + ASSERT_FALSE(cic.haveEntryMismatch()); + ASSERT_EQ(cic.getNumDocs(), cic.getTotalIndexKeys()); +} + +TEST_F(ColumnIndexConsistencyTest, VerifyEntriesNotEqual) { + ColumnIndexConsistency cic = createColumnStoreConsistency(); + + for (const auto& v : kEntries) { + cic.addDocEntry(v); + } + cic.addDocEntry({"path444"_sd, 4, "value1"_sd}); + for (const auto& v : kEntries) { + cic.addIndexEntry(v); + } + cic.addIndexEntry({"path444"_sd, 4, "value10"_sd}); + ASSERT_TRUE(cic.haveEntryMismatch()); + ASSERT_EQ(cic.getNumDocs(), cic.getTotalIndexKeys()); +} + +TEST_F(ColumnIndexConsistencyTest, VerifyUnorderedEntriesEqual) { + ColumnIndexConsistency cic = createColumnStoreConsistency(); + + for (int i = 0; i < 10000; i++) { + cic.addDocEntry({"foo"_sd, i, "bar"_sd}); + } + + for (int i = 9999; i >= 0; i--) { + cic.addIndexEntry({"foo"_sd, i, "bar"_sd}); + } + ASSERT_EQ(cic.getNumDocs(), 10000u); + ASSERT_EQ(cic.getNumDocs(), cic.getTotalIndexKeys()); + ASSERT_FALSE(cic.haveEntryMismatch()); + + cic.addDocEntry({"foo"_sd, 5311, "bar"_sd}); + ASSERT_NE(cic.getNumDocs(), cic.getTotalIndexKeys()); + ASSERT_TRUE(cic.haveEntryMismatch()); + + cic.addIndexEntry({"foo"_sd, 5311, "bar"_sd}); + ASSERT_EQ(cic.getNumDocs(), cic.getTotalIndexKeys()); + ASSERT_FALSE(cic.haveEntryMismatch()); + + for (const auto& v : kEntries) { + cic.addDocEntry(v); + } + for (auto it = kEntries.crbegin(); it != kEntries.crend(); it++) { + cic.addIndexEntry(*it); + } + ASSERT_FALSE(cic.haveEntryMismatch()); + ASSERT_EQ(cic.getNumDocs(), cic.getTotalIndexKeys()); +} + +TEST_F(ColumnIndexConsistencyTest, VerifyEvenBucketDistribution) { + const size_t numBuckets = 1000; + ColumnIndexConsistency cic = createColumnStoreConsistency(numBuckets); + + for (int i = 0; i < 10000; i++) { + cic.addDocEntry({"foo"_sd, i, "bar"_sd}); + } + + uint32_t maxValue = 0; + for (size_t i = 0; i < numBuckets; i++) { + maxValue = std::max(cic.getBucketCount(i), maxValue); + } + ASSERT_LT(maxValue, uint32_t(40)); // perfect would be 20, so 2x that is our made-up threshold +} + +TEST_F(ColumnIndexConsistencyTest, VerifyEvenBucketDistributionFewerInputs) { + // fewer, more varied inputs + const size_t numBuckets = 5; + ColumnIndexConsistency cic = createColumnStoreConsistency(numBuckets); + for (const auto& v : kEntries) { + cic.addDocEntry(v); + } + uint32_t maxValue = 0; + for (size_t i = 0; i < numBuckets; i++) { + maxValue = std::max(cic.getBucketCount(i), maxValue); + } + ASSERT_LT(maxValue, uint32_t(40)); // perfect would be 20, 2x that is our made-up threshold +} + +TEST_F(ColumnIndexConsistencyTest, VerifySizeCounters) { + const size_t numBuckets = 5; + ColumnIndexConsistency cic = createColumnStoreConsistency(numBuckets); + + cic.addDocEntry({""_sd, 1, "b"_sd}); // 9 bytes + cic.addDocEntry({"f"_sd, 0, ""_sd}); // 9 bytes + cic.addDocEntry({"f"_sd, 1, "b"_sd}); // 10 bytes + + cic.addIndexEntry({""_sd, 1, "b"_sd}); + cic.addIndexEntry({"f"_sd, 0, ""_sd}); + cic.addIndexEntry({"f"_sd, 1, "b"_sd}); + + size_t totalSize = 0; + for (size_t i = 0; i < numBuckets; i++) { + totalSize += cic.getBucketSizeBytes(i); + } + ASSERT_EQ(totalSize, 112u); // 28 * 2 (hashed twice) * 2 (hash doc + index) = 112 +} + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/catalog/database_impl.cpp b/src/mongo/db/catalog/database_impl.cpp index 54d9b3fd930..47416142ee4 100644 --- a/src/mongo/db/catalog/database_impl.cpp +++ b/src/mongo/db/catalog/database_impl.cpp @@ -57,6 +57,7 @@ #include "mongo/db/query/query_knobs_gen.h" #include "mongo/db/repl/drop_pending_collection_reaper.h" #include "mongo/db/repl/oplog.h" +#include "mongo/db/repl/replication_consistency_markers_impl.h" #include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/s/collection_sharding_state.h" #include "mongo/db/s/database_sharding_state.h" @@ -749,6 +750,21 @@ Collection* DatabaseImpl::createVirtualCollection(OperationContext* opCtx, vopts); } +/** + * Some system collections (namely, 'config.transactions' and + * 'local.replset.oplogTruncateAfterPoint') are special internal collections that are written to + * without updating indexes, so there's no value in creating an index on them. + * + * @return true if any modification on the collection data leads to updating the indexes defined on + * it. + */ +bool doesCollectionModificationsUpdateIndexes(StringData collName) { + // TODO SERVER-72496: investigate whether 'config.transactions' should remain here or not. + return collName != "config.transactions" && + collName != + repl::ReplicationConsistencyMarkersImpl::kDefaultOplogTruncateAfterPointNamespace; +} + Collection* DatabaseImpl::_createCollection( OperationContext* opCtx, const NamespaceString& nss, @@ -856,7 +872,8 @@ Collection* DatabaseImpl::_createCollection( opCtx, collection, !idIndex.isEmpty() ? idIndex : ic->getDefaultIdIndexSpec(collection))); - createColumnIndex = createColumnIndexOnAllCollections.shouldFail(); + createColumnIndex = createColumnIndexOnAllCollections.shouldFail() && + doesCollectionModificationsUpdateIndexes(nss.ns()); } else { // autoIndexId: false is only allowed on unreplicated collections. uassert(50001, diff --git a/src/mongo/db/catalog/index_consistency.cpp b/src/mongo/db/catalog/index_consistency.cpp index 40dec646b08..f16bb7ebfa3 100644 --- a/src/mongo/db/catalog/index_consistency.cpp +++ b/src/mongo/db/catalog/index_consistency.cpp @@ -43,17 +43,22 @@ #include "mongo/db/index/index_descriptor.h" #include "mongo/db/multi_key_path_tracker.h" #include "mongo/db/record_id_helpers.h" +#include "mongo/db/storage/execution_context.h" #include "mongo/logv2/log.h" #include "mongo/util/string_map.h" +#include "mongo/util/testing_proctor.h" #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kStorage namespace mongo { +const long long IndexConsistency::kInterruptIntervalNumRecords = 4096; +const size_t IndexConsistency::kNumHashBuckets = 1U << 16; + namespace { -const size_t kNumHashBuckets = 1U << 16; +MONGO_FAIL_POINT_DEFINE(crashOnMultikeyValidateFailure); StringSet::hasher hash; @@ -95,18 +100,31 @@ IndexEntryInfo::IndexEntryInfo(const IndexInfo& indexInfo, keyString(entryKeyString) {} IndexConsistency::IndexConsistency(OperationContext* opCtx, - CollectionValidation::ValidateState* validateState) + CollectionValidation::ValidateState* validateState, + const size_t numHashBuckets) : _validateState(validateState), _firstPhase(true) { - _indexKeyBuckets.resize(kNumHashBuckets); + _indexKeyBuckets.resize(numHashBuckets); +} + +void IndexConsistency::setSecondPhase() { + invariant(_firstPhase); + _firstPhase = false; +} +KeyStringIndexConsistency::KeyStringIndexConsistency( + OperationContext* opCtx, + CollectionValidation::ValidateState* validateState, + const size_t numHashBuckets) + : IndexConsistency(opCtx, validateState, numHashBuckets) { for (const auto& index : _validateState->getIndexes()) { - const IndexDescriptor* descriptor = index->descriptor(); + const auto descriptor = index->descriptor(); IndexAccessMethod* accessMethod = const_cast<IndexAccessMethod*>(index->accessMethod()); _indexesInfo.emplace(descriptor->indexName(), IndexInfo(descriptor, accessMethod)); } } -void IndexConsistency::addMultikeyMetadataPath(const KeyString::Value& ks, IndexInfo* indexInfo) { +void KeyStringIndexConsistency::addMultikeyMetadataPath(const KeyString::Value& ks, + IndexInfo* indexInfo) { auto hash = _hashKeyString(ks, indexInfo->indexNameHash); if (MONGO_unlikely(_validateState->extraLoggingForTest())) { LOGV2(6208500, @@ -117,8 +135,8 @@ void IndexConsistency::addMultikeyMetadataPath(const KeyString::Value& ks, Index indexInfo->hashedMultikeyMetadataPaths.emplace(hash); } -void IndexConsistency::removeMultikeyMetadataPath(const KeyString::Value& ks, - IndexInfo* indexInfo) { +void KeyStringIndexConsistency::removeMultikeyMetadataPath(const KeyString::Value& ks, + IndexInfo* indexInfo) { auto hash = _hashKeyString(ks, indexInfo->indexNameHash); if (MONGO_unlikely(_validateState->extraLoggingForTest())) { LOGV2(6208501, @@ -129,23 +147,18 @@ void IndexConsistency::removeMultikeyMetadataPath(const KeyString::Value& ks, indexInfo->hashedMultikeyMetadataPaths.erase(hash); } -size_t IndexConsistency::getMultikeyMetadataPathCount(IndexInfo* indexInfo) { +size_t KeyStringIndexConsistency::getMultikeyMetadataPathCount(IndexInfo* indexInfo) { return indexInfo->hashedMultikeyMetadataPaths.size(); } -bool IndexConsistency::haveEntryMismatch() const { +bool KeyStringIndexConsistency::haveEntryMismatch() const { return std::any_of(_indexKeyBuckets.begin(), _indexKeyBuckets.end(), [](const IndexKeyBucket& bucket) -> bool { return bucket.indexKeyCount; }); } -void IndexConsistency::setSecondPhase() { - invariant(_firstPhase); - _firstPhase = false; -} - -void IndexConsistency::repairMissingIndexEntries(OperationContext* opCtx, - ValidateResults* results) { +void KeyStringIndexConsistency::repairIndexEntries(OperationContext* opCtx, + ValidateResults* results) { invariant(_validateState->getIndexes().size() > 0); std::shared_ptr<const IndexCatalogEntry> index = _validateState->getIndexes().front(); for (auto it = _missingIndexEntries.begin(); it != _missingIndexEntries.end();) { @@ -194,7 +207,8 @@ void IndexConsistency::repairMissingIndexEntries(OperationContext* opCtx, } } -void IndexConsistency::addIndexEntryErrors(ValidateResults* results) { +void KeyStringIndexConsistency::addIndexEntryErrors(OperationContext* opCtx, + ValidateResults* results) { invariant(!_firstPhase); // We'll report up to 1MB for extra index entry errors and missing index entry errors. @@ -290,8 +304,8 @@ void IndexConsistency::addIndexEntryErrors(ValidateResults* results) { } } -void IndexConsistency::addDocumentMultikeyPaths(IndexInfo* indexInfo, - const MultikeyPaths& newPaths) { +void KeyStringIndexConsistency::addDocumentMultikeyPaths(IndexInfo* indexInfo, + const MultikeyPaths& newPaths) { invariant(newPaths.size()); if (indexInfo->docMultikeyPaths.size()) { MultikeyPathTracker::mergeMultikeyPaths(&indexInfo->docMultikeyPaths, newPaths); @@ -301,10 +315,10 @@ void IndexConsistency::addDocumentMultikeyPaths(IndexInfo* indexInfo, } } -void IndexConsistency::addDocKey(OperationContext* opCtx, - const KeyString::Value& ks, - IndexInfo* indexInfo, - const RecordId& recordId) { +void KeyStringIndexConsistency::addDocKey(OperationContext* opCtx, + const KeyString::Value& ks, + IndexInfo* indexInfo, + const RecordId& recordId) { auto rawHash = ks.hash(indexInfo->indexNameHash); auto hashLower = rawHash % kNumHashBuckets; auto hashUpper = (rawHash / kNumHashBuckets) % kNumHashBuckets; @@ -358,11 +372,11 @@ void IndexConsistency::addDocKey(OperationContext* opCtx, } } -void IndexConsistency::addIndexKey(OperationContext* opCtx, - const KeyString::Value& ks, - IndexInfo* indexInfo, - const RecordId& recordId, - ValidateResults* results) { +void KeyStringIndexConsistency::addIndexKey(OperationContext* opCtx, + const KeyString::Value& ks, + IndexInfo* indexInfo, + const RecordId& recordId, + ValidateResults* results) { auto rawHash = ks.hash(indexInfo->indexNameHash); auto hashLower = rawHash % kNumHashBuckets; auto hashUpper = (rawHash / kNumHashBuckets) % kNumHashBuckets; @@ -442,7 +456,7 @@ void IndexConsistency::addIndexKey(OperationContext* opCtx, } } -bool IndexConsistency::limitMemoryUsageForSecondPhase(ValidateResults* result) { +bool KeyStringIndexConsistency::limitMemoryUsageForSecondPhase(ValidateResults* result) { invariant(!_firstPhase); const uint32_t maxMemoryUsageBytes = maxValidateMemoryUsageMB.load() * 1024 * 1024; @@ -508,11 +522,436 @@ bool IndexConsistency::limitMemoryUsageForSecondPhase(ValidateResults* result) { return true; } -BSONObj IndexConsistency::_generateInfo(const std::string& indexName, - const BSONObj& keyPattern, - const RecordId& recordId, - const BSONObj& indexKey, - const BSONObj& idKey) { +void KeyStringIndexConsistency::validateIndexKeyCount(OperationContext* opCtx, + const IndexCatalogEntry* index, + long long* numRecords, + IndexValidateResults& results) { + // Fetch the total number of index entries we previously found traversing the index. + const IndexDescriptor* desc = index->descriptor(); + const std::string indexName = desc->indexName(); + IndexInfo* indexInfo = &this->getIndexInfo(indexName); + const auto numTotalKeys = indexInfo->numKeys; + + // Update numRecords by subtracting number of records removed from record store in repair mode + // when validating index consistency + (*numRecords) -= results.keysRemovedFromRecordStore; + + // Do not fail on finding too few index entries compared to collection entries when full:false. + bool hasTooFewKeys = false; + const bool noErrorOnTooFewKeys = !_validateState->isFullIndexValidation(); + + if (desc->isIdIndex() && numTotalKeys != (*numRecords)) { + hasTooFewKeys = (numTotalKeys < (*numRecords)); + const std::string msg = str::stream() + << "number of _id index entries (" << numTotalKeys + << ") does not match the number of documents in the index (" << (*numRecords) << ")"; + if (noErrorOnTooFewKeys && (numTotalKeys < (*numRecords))) { + results.warnings.push_back(msg); + } else { + results.errors.push_back(msg); + results.valid = false; + } + } + + // Hashed indexes may never be multikey. + if (desc->getAccessMethodName() == IndexNames::HASHED && + index->isMultikey(opCtx, _validateState->getCollection())) { + results.errors.push_back(str::stream() << "Hashed index is incorrectly marked multikey: " + << desc->indexName()); + results.valid = false; + } + + // Confirm that the number of index entries is not greater than the number of documents in the + // collection. This check is only valid for indexes that are not multikey (indexed arrays + // produce an index key per array entry) and not $** indexes which can produce index keys for + // multiple paths within a single document. + if (results.valid && !index->isMultikey(opCtx, _validateState->getCollection()) && + desc->getIndexType() != IndexType::INDEX_WILDCARD && numTotalKeys > (*numRecords)) { + const std::string err = str::stream() + << "index " << desc->indexName() << " is not multi-key, but has more entries (" + << numTotalKeys << ") than documents in the index (" << (*numRecords) << ")"; + results.errors.push_back(err); + results.valid = false; + } + + // Ignore any indexes with a special access method. If an access method name is given, the + // index may be a full text, geo or special index plugin with different semantics. + if (results.valid && !desc->isSparse() && !desc->isPartial() && !desc->isIdIndex() && + desc->getAccessMethodName() == "" && numTotalKeys < (*numRecords)) { + hasTooFewKeys = true; + const std::string msg = str::stream() + << "index " << desc->indexName() << " is not sparse or partial, but has fewer entries (" + << numTotalKeys << ") than documents in the index (" << (*numRecords) << ")"; + if (noErrorOnTooFewKeys) { + results.warnings.push_back(msg); + } else { + results.errors.push_back(msg); + results.valid = false; + } + } + + if (!_validateState->isFullIndexValidation() && hasTooFewKeys) { + const std::string warning = str::stream() + << "index " << desc->indexName() << " has fewer keys than records." + << " Please re-run the validate command with {full: true}"; + results.warnings.push_back(warning); + } +} + +namespace { +// Ensures that index entries are in increasing or decreasing order. +void _validateKeyOrder(OperationContext* opCtx, + const IndexCatalogEntry* index, + const KeyString::Value& currKey, + const KeyString::Value& prevKey, + IndexValidateResults* results) { + const auto descriptor = index->descriptor(); + const bool unique = descriptor->unique(); + + // KeyStrings will be in strictly increasing order because all keys are sorted and they are in + // the format (Key, RID), and all RecordIDs are unique. + if (currKey.compare(prevKey) <= 0) { + if (results && results->valid) { + results->errors.push_back(str::stream() + << "index '" << descriptor->indexName() + << "' is not in strictly ascending or descending order"); + } + if (results) { + results->valid = false; + } + return; + } + + if (unique) { + // Unique indexes must not have duplicate keys. + const int cmp = currKey.compareWithoutRecordIdLong(prevKey); + if (cmp != 0) { + return; + } + + if (results && results->valid) { + const auto bsonKey = + KeyString::toBson(currKey, Ordering::make(descriptor->keyPattern())); + const auto firstRecordId = + KeyString::decodeRecordIdLongAtEnd(prevKey.getBuffer(), prevKey.getSize()); + const auto secondRecordId = + KeyString::decodeRecordIdLongAtEnd(currKey.getBuffer(), currKey.getSize()); + results->errors.push_back(str::stream() << "Unique index '" << descriptor->indexName() + << "' has duplicate key: " << bsonKey + << ", first record: " << firstRecordId + << ", second record: " << secondRecordId); + } + if (results) { + results->valid = false; + } + } +} +} // namespace + +int64_t KeyStringIndexConsistency::traverseIndex(OperationContext* opCtx, + const IndexCatalogEntry* index, + ProgressMeterHolder& _progress, + ValidateResults* results) { + const auto descriptor = index->descriptor(); + const auto indexName = descriptor->indexName(); + auto& indexResults = results->indexResultsMap[indexName]; + IndexInfo& indexInfo = this->getIndexInfo(indexName); + int64_t numKeys = 0; + + bool isFirstEntry = true; + + const KeyString::Version version = + index->accessMethod()->asSortedData()->getSortedDataInterface()->getKeyStringVersion(); + + KeyString::Builder firstKeyStringBuilder( + version, BSONObj(), indexInfo.ord, KeyString::Discriminator::kExclusiveBefore); + const KeyString::Value firstKeyString = firstKeyStringBuilder.getValueCopy(); + KeyString::Value prevIndexKeyStringValue; + + // Ensure that this index has an open index cursor. + const auto indexCursorIt = _validateState->getIndexCursors().find(indexName); + invariant(indexCursorIt != _validateState->getIndexCursors().end()); + + const std::unique_ptr<SortedDataInterfaceThrottleCursor>& indexCursor = indexCursorIt->second; + + boost::optional<KeyStringEntry> indexEntry; + try { + indexEntry = indexCursor->seekForKeyString(opCtx, firstKeyString); + } catch (const DBException& ex) { + if (TestingProctor::instance().isEnabled() && ex.code() != ErrorCodes::WriteConflict) { + LOGV2_FATAL(5318400, + "Error seeking to first key", + "error"_attr = ex.toString(), + "index"_attr = indexName, + "key"_attr = firstKeyString.toString()); + } + throw; + } + + const auto keyFormat = + index->accessMethod()->asSortedData()->getSortedDataInterface()->rsKeyFormat(); + const RecordId kWildcardMultikeyMetadataRecordId = record_id_helpers::reservedIdFor( + record_id_helpers::ReservationId::kWildcardMultikeyMetadataId, keyFormat); + + // Warn about unique indexes with keys in old format (without record id). + bool foundOldUniqueIndexKeys = false; + + while (indexEntry) { + if (!isFirstEntry) { + _validateKeyOrder( + opCtx, index, indexEntry->keyString, prevIndexKeyStringValue, &indexResults); + } + + if (!foundOldUniqueIndexKeys && !descriptor->isIdIndex() && descriptor->unique() && + !indexCursor->isRecordIdAtEndOfKeyString()) { + results->warnings.push_back( + fmt::format("Unique index {} has one or more keys in the old format (without " + "embedded record id). First record: {}", + indexInfo.indexName, + indexEntry->loc.toString())); + foundOldUniqueIndexKeys = true; + } + + const bool isMetadataKey = indexEntry->loc == kWildcardMultikeyMetadataRecordId; + if (descriptor->getIndexType() == IndexType::INDEX_WILDCARD && isMetadataKey) { + this->removeMultikeyMetadataPath(indexEntry->keyString, &indexInfo); + } else { + try { + this->addIndexKey( + opCtx, indexEntry->keyString, &indexInfo, indexEntry->loc, results); + } catch (const DBException& e) { + StringBuilder ss; + ss << "Parsing index key for " << indexInfo.indexName << " recId " + << indexEntry->loc << " threw exception " << e.toString(); + results->errors.push_back(ss.str()); + results->valid = false; + } + } + { + stdx::unique_lock<Client> lk(*opCtx->getClient()); + _progress.get(lk)->hit(); + } + numKeys++; + isFirstEntry = false; + prevIndexKeyStringValue = indexEntry->keyString; + + if (numKeys % kInterruptIntervalNumRecords == 0) { + // Periodically checks for interrupts and yields. + opCtx->checkForInterrupt(); + _validateState->yield(opCtx); + } + + try { + indexEntry = indexCursor->nextKeyString(opCtx); + } catch (const DBException& ex) { + if (TestingProctor::instance().isEnabled() && ex.code() != ErrorCodes::WriteConflict) { + LOGV2_FATAL(5318401, + "Error advancing index cursor", + "error"_attr = ex.toString(), + "index"_attr = indexName, + "prevKey"_attr = prevIndexKeyStringValue.toString()); + } + throw; + } + } + + if (results && this->getMultikeyMetadataPathCount(&indexInfo) > 0) { + results->errors.push_back(str::stream() + << "Index '" << descriptor->indexName() + << "' has one or more missing multikey metadata index keys"); + results->valid = false; + } + + // Adjust multikey metadata when allowed. These states are all allowed by the design of + // multikey. A collection should still be valid without these adjustments. + if (_validateState->adjustMultikey()) { + + // If this collection has documents that make this index multikey, then check whether those + // multikey paths match the index's metadata. + const auto indexPaths = index->getMultikeyPaths(opCtx, _validateState->getCollection()); + const auto& documentPaths = indexInfo.docMultikeyPaths; + if (indexInfo.multikeyDocs && documentPaths != indexPaths) { + LOGV2(5367500, + "Index's multikey paths do not match those of its documents", + "index"_attr = descriptor->indexName(), + "indexPaths"_attr = MultikeyPathTracker::dumpMultikeyPaths(indexPaths), + "documentPaths"_attr = MultikeyPathTracker::dumpMultikeyPaths(documentPaths)); + + // Since we have the correct multikey path information for this index, we can tighten up + // its metadata to improve query performance. This may apply in two distinct scenarios: + // 1. Collection data has changed such that the current multikey paths on the index + // are too permissive and certain document paths are no longer multikey. + // 2. This index was built before 3.4, and there is no multikey path information for + // the index. We can effectively 'upgrade' the index so that it does not need to be + // rebuilt to update this information. + writeConflictRetry(opCtx, "updateMultikeyPaths", _validateState->nss().ns(), [&]() { + WriteUnitOfWork wuow(opCtx); + auto writeableIndex = const_cast<IndexCatalogEntry*>(index); + const bool isMultikey = true; + writeableIndex->forceSetMultikey( + opCtx, _validateState->getCollection(), isMultikey, documentPaths); + wuow.commit(); + }); + + if (results) { + results->warnings.push_back(str::stream() << "Updated index multikey metadata" + << ": " << descriptor->indexName()); + results->repaired = true; + } + } + + // If this index does not need to be multikey, then unset the flag. + if (index->isMultikey(opCtx, _validateState->getCollection()) && !indexInfo.multikeyDocs) { + invariant(!indexInfo.docMultikeyPaths.size()); + + LOGV2(5367501, + "Index is multikey but there are no multikey documents", + "index"_attr = descriptor->indexName()); + + // This makes an improvement in the case that no documents make the index multikey and + // the flag can be unset entirely. This may be due to a change in the data or historical + // multikey bugs that have persisted incorrect multikey infomation. + writeConflictRetry(opCtx, "unsetMultikeyPaths", _validateState->nss().ns(), [&]() { + WriteUnitOfWork wuow(opCtx); + auto writeableIndex = const_cast<IndexCatalogEntry*>(index); + const bool isMultikey = false; + writeableIndex->forceSetMultikey( + opCtx, _validateState->getCollection(), isMultikey, {}); + wuow.commit(); + }); + + if (results) { + results->warnings.push_back(str::stream() << "Unset index multikey metadata" + << ": " << descriptor->indexName()); + results->repaired = true; + } + } + } + + return numKeys; +} + +void KeyStringIndexConsistency::traverseRecord(OperationContext* opCtx, + const CollectionPtr& coll, + const IndexCatalogEntry* index, + const RecordId& recordId, + const BSONObj& recordBson, + ValidateResults* results) { + const auto iam = index->accessMethod()->asSortedData(); + + const auto descriptor = index->descriptor(); + SharedBufferFragmentBuilder pool(KeyString::HeapBuilder::kHeapAllocatorDefaultBytes); + auto& executionCtx = StorageExecutionContext::get(opCtx); + + const auto documentKeySet = executionCtx.keys(); + const auto multikeyMetadataKeys = executionCtx.multikeyMetadataKeys(); + const auto documentMultikeyPaths = executionCtx.multikeyPaths(); + + iam->getKeys(opCtx, + coll, + pool, + recordBson, + InsertDeleteOptions::ConstraintEnforcementMode::kEnforceConstraints, + SortedDataIndexAccessMethod::GetKeysContext::kAddingKeys, + documentKeySet.get(), + multikeyMetadataKeys.get(), + documentMultikeyPaths.get(), + recordId); + + const bool shouldBeMultikey = + iam->shouldMarkIndexAsMultikey(documentKeySet->size(), + {multikeyMetadataKeys->begin(), multikeyMetadataKeys->end()}, + *documentMultikeyPaths); + + if (!index->isMultikey(opCtx, coll) && shouldBeMultikey) { + if (_validateState->fixErrors()) { + writeConflictRetry(opCtx, "setIndexAsMultikey", coll->ns().ns(), [&] { + WriteUnitOfWork wuow(opCtx); + coll->getIndexCatalog()->setMultikeyPaths( + opCtx, coll, descriptor, *multikeyMetadataKeys, *documentMultikeyPaths); + wuow.commit(); + }); + + LOGV2(4614700, + "Index set to multikey", + "indexName"_attr = descriptor->indexName(), + "collection"_attr = coll->ns().ns()); + results->warnings.push_back(str::stream() << "Index " << descriptor->indexName() + << " set to multikey."); + results->repaired = true; + } else { + auto& curRecordResults = (results->indexResultsMap)[descriptor->indexName()]; + const std::string msg = fmt::format( + "Index {} is not multikey but has more than one key in document with " + "RecordId({}) and {}", + descriptor->indexName(), + recordId.toString(), + recordBson.getField("_id").toString()); + curRecordResults.errors.push_back(msg); + curRecordResults.valid = false; + if (crashOnMultikeyValidateFailure.shouldFail()) { + invariant(false, msg); + } + } + } + + if (index->isMultikey(opCtx, coll)) { + const MultikeyPaths& indexPaths = index->getMultikeyPaths(opCtx, coll); + if (!MultikeyPathTracker::covers(indexPaths, *documentMultikeyPaths.get())) { + if (_validateState->fixErrors()) { + writeConflictRetry(opCtx, "increaseMultikeyPathCoverage", coll->ns().ns(), [&] { + WriteUnitOfWork wuow(opCtx); + coll->getIndexCatalog()->setMultikeyPaths( + opCtx, coll, descriptor, *multikeyMetadataKeys, *documentMultikeyPaths); + wuow.commit(); + }); + + LOGV2(4614701, + "Multikey paths updated to cover multikey document", + "indexName"_attr = descriptor->indexName(), + "collection"_attr = coll->ns().ns()); + results->warnings.push_back(str::stream() << "Index " << descriptor->indexName() + << " multikey paths updated."); + results->repaired = true; + } else { + const std::string msg = fmt::format( + "Index {} multikey paths do not cover a document with RecordId({}) and {}", + descriptor->indexName(), + recordId.toString(), + recordBson.getField("_id").toString()); + auto& curRecordResults = (results->indexResultsMap)[descriptor->indexName()]; + curRecordResults.errors.push_back(msg); + curRecordResults.valid = false; + } + } + } + + IndexInfo& indexInfo = this->getIndexInfo(descriptor->indexName()); + if (shouldBeMultikey) { + indexInfo.multikeyDocs = true; + } + + // An empty set of multikey paths indicates that this index does not track path-level + // multikey information and we should do no tracking. + if (shouldBeMultikey && documentMultikeyPaths->size()) { + this->addDocumentMultikeyPaths(&indexInfo, *documentMultikeyPaths); + } + + for (const auto& keyString : *multikeyMetadataKeys) { + this->addMultikeyMetadataPath(keyString, &indexInfo); + } + + for (const auto& keyString : *documentKeySet) { + _totalIndexKeys++; + this->addDocKey(opCtx, keyString, &indexInfo, recordId); + } +} + +BSONObj KeyStringIndexConsistency::_generateInfo(const std::string& indexName, + const BSONObj& keyPattern, + const RecordId& recordId, + const BSONObj& indexKey, + const BSONObj& idKey) { // We need to rehydrate the indexKey for improved readability. // {"": ObjectId(...)} -> {"_id": ObjectId(...)} @@ -543,8 +982,8 @@ BSONObj IndexConsistency::_generateInfo(const std::string& indexName, return infoBuilder.obj(); } -uint32_t IndexConsistency::_hashKeyString(const KeyString::Value& ks, - uint32_t indexNameHash) const { +uint32_t KeyStringIndexConsistency::_hashKeyString(const KeyString::Value& ks, + const uint32_t indexNameHash) const { return ks.hash(indexNameHash); } } // namespace mongo diff --git a/src/mongo/db/catalog/index_consistency.h b/src/mongo/db/catalog/index_consistency.h index ade97d20918..f873e83addf 100644 --- a/src/mongo/db/catalog/index_consistency.h +++ b/src/mongo/db/catalog/index_consistency.h @@ -32,6 +32,7 @@ #include "mongo/bson/simple_bsonobj_comparator.h" #include "mongo/db/catalog/validate_state.h" #include "mongo/db/storage/key_string.h" +#include "mongo/util/progress_meter.h" namespace mongo { @@ -69,8 +70,7 @@ struct IndexInfo { }; /** - * Used by _missingIndexEntries to be able to easily access keyString during - * repairMissingIndexEntries. + * Used by _missingIndexEntries to be able to easily access keyString during repairIndexEntries. */ struct IndexEntryInfo { IndexEntryInfo(const IndexInfo& indexInfo, @@ -85,90 +85,116 @@ struct IndexEntryInfo { KeyString::Value keyString; }; + /** - * The IndexConsistency class is used to keep track of the index consistency. - * It does this by using the index keys from index entries and index keys generated from the - * document to ensure there is a one-to-one mapping for each key. - * In addition, an IndexObserver class can be hooked into the IndexAccessMethod to inform - * this class about changes to the indexes during a validation and compensate for them. + * The IndexConsistency class provides the base class definitions for index-consistency + * sub-classes. The base implementation in this class provides the basis for keeping track of the + * index consistency. It does this by using the index keys from index entries and index keys + * generated from the document to ensure there is a one-to-one mapping for each key. */ -class IndexConsistency final { +class IndexConsistency { using IndexInfoMap = std::map<std::string, IndexInfo>; using IndexKey = std::pair<std::string, std::string>; public: - IndexConsistency(OperationContext* opCtx, CollectionValidation::ValidateState* validateState); + static const long long kInterruptIntervalNumRecords; + static const size_t kNumHashBuckets; - /** - * During the first phase of validation, given the document's key KeyString, increment the - * corresponding `_indexKeyCount` by hashing it. - * For the second phase of validation, keep track of the document keys that hashed to - * inconsistent hash buckets during the first phase of validation. - */ - void addDocKey(OperationContext* opCtx, - const KeyString::Value& ks, - IndexInfo* indexInfo, - const RecordId& recordId); + IndexConsistency(OperationContext* opCtx, + CollectionValidation::ValidateState* validateState, + size_t numHashBuckets = kNumHashBuckets); /** - * During the first phase of validation, given the index entry's KeyString, decrement the - * corresponding `_indexKeyCount` by hashing it. - * For the second phase of validation, try to match the index entry keys that hashed to - * inconsistent hash buckets during the first phase of validation to document keys. + * Informs the IndexConsistency object that we're advancing to the second phase of + * index validation. */ - void addIndexKey(OperationContext* opCtx, - const KeyString::Value& ks, - IndexInfo* indexInfo, - const RecordId& recordId, - ValidateResults* results); + void setSecondPhase(); - /** - * During the first phase of validation, tracks the multikey paths for every observed document. - */ - void addDocumentMultikeyPaths(IndexInfo* indexInfo, const MultikeyPaths& multikeyPaths); + virtual ~IndexConsistency() = default; - /** - * To validate $** multikey metadata paths, we first scan the collection and add a hash of all - * multikey paths encountered to a set. We then scan the index for multikey metadata path - * entries and remove any path encountered. As we expect the index to contain a super-set of - * the collection paths, a non-empty set represents an invalid index. - */ - void addMultikeyMetadataPath(const KeyString::Value& ks, IndexInfo* indexInfo); - void removeMultikeyMetadataPath(const KeyString::Value& ks, IndexInfo* indexInfo); - size_t getMultikeyMetadataPathCount(IndexInfo* indexInfo); +protected: + struct IndexKeyBucket { + uint32_t indexKeyCount; + uint32_t bucketSizeBytes; + }; + + CollectionValidation::ValidateState* _validateState; + + // We map the hashed KeyString values to a bucket that contains the count of how many + // index keys and document keys we've seen in each bucket. This counter is unsigned to avoid + // undefined behavior in the (unlikely) case of overflow. + // Count rules: + // - If the count is non-zero for a bucket after all documents and index entries have been + // processed, one or more indexes are inconsistent for KeyStrings that map to it. + // Otherwise, those keys are consistent for all indexes with a high degree of confidence. + // - Absent overflow, if a count interpreted as twos complement integer ends up greater + // than zero, there are too few index entries. + // - Similarly, if that count ends up less than zero, there are too many index entries. + + std::vector<IndexKeyBucket> _indexKeyBuckets; + + // Whether we're in the first or second phase of index validation. + bool _firstPhase; + +private: + IndexConsistency() = delete; +}; // IndexConsistency + +/** + * The KeyStringIndexConsistency class is used to keep track of the index consistency for + * KeyString based indexes. It does this by using the index keys from index entries and index keys + * generated from the document to ensure there is a one-to-one mapping for each key. In addition, an + * IndexObserver class can be hooked into the IndexAccessMethod to inform this class about changes + * to the indexes during a validation and compensate for them. + */ +class KeyStringIndexConsistency final : protected IndexConsistency { + using IndexInfoMap = std::map<std::string, IndexInfo>; + using IndexKey = std::pair<std::string, std::string>; + +public: + KeyStringIndexConsistency(OperationContext* opCtx, + CollectionValidation::ValidateState* validateState, + size_t numHashBuckets = kNumHashBuckets); + + void setSecondPhase() { + IndexConsistency::setSecondPhase(); + } /** - * Returns true if any value in the `_indexKeyCount` map is not equal to 0, otherwise - * return false. + * Traverses the column-store index via 'cursor' and accumulates the traversal results. */ - bool haveEntryMismatch() const; + int64_t traverseIndex(OperationContext* opCtx, + const IndexCatalogEntry* index, + ProgressMeterHolder& _progress, + ValidateResults* results); /** - * Return info on all indexes tracked by this. + * Traverses all paths in a single record from the row-store via the given {'recordId','record'} + * pair and accumulates the traversal results. */ - IndexInfoMap& getIndexInfo() { - return _indexesInfo; - } - IndexInfo& getIndexInfo(const std::string& indexName) { - return _indexesInfo.at(indexName); - } + void traverseRecord(OperationContext* opCtx, + const CollectionPtr& coll, + const IndexCatalogEntry* index, + const RecordId& recordId, + const BSONObj& recordBson, + ValidateResults* results); /** - * Informs the IndexConsistency object that we're advancing to the second phase of index - * validation. + * Returns true if any value in the `_indexKeyCount` map is not equal to 0, otherwise return + * false. */ - void setSecondPhase(); + bool haveEntryMismatch() const; /** * If repair mode enabled, try inserting _missingIndexEntries into indexes. */ - void repairMissingIndexEntries(OperationContext* opCtx, ValidateResults* results); + void repairIndexEntries(OperationContext* opCtx, ValidateResults* results); /** * Records the errors gathered from the second phase of index validation into the provided * ValidateResultsMap and ValidateResults. */ - void addIndexEntryErrors(ValidateResults* results); + void addIndexEntryErrors(OperationContext* opCtx, ValidateResults* results); /** * Sets up this IndexConsistency object to limit memory usage in the second phase of index @@ -177,35 +203,21 @@ public: */ bool limitMemoryUsageForSecondPhase(ValidateResults* result); -private: - struct IndexKeyBucket { - uint32_t indexKeyCount; - uint32_t bucketSizeBytes; - }; - - IndexConsistency() = delete; + void validateIndexKeyCount(OperationContext* opCtx, + const IndexCatalogEntry* index, + long long* numRecords, + IndexValidateResults& results); - CollectionValidation::ValidateState* _validateState; - - // We map the hashed KeyString values to a bucket that contains the count of how many - // index keys and document keys we've seen in each bucket. This counter is unsigned to avoid - // undefined behavior in the (unlikely) case of overflow. - // Count rules: - // - If the count is non-zero for a bucket after all documents and index entries have been - // processed, one or more indexes are inconsistent for KeyStrings that map to it. - // Otherwise, those keys are consistent for all indexes with a high degree of confidence. - // - Absent overflow, if a count interpreted as twos complement integer ends up greater - // than zero, there are too few index entries. - // - Similarly, if that count ends up less than zero, there are too many index entries. + uint64_t getTotalIndexKeys() { + return _totalIndexKeys; + } - std::vector<IndexKeyBucket> _indexKeyBuckets; +private: + KeyStringIndexConsistency() = delete; // A vector of IndexInfo indexes by index number IndexInfoMap _indexesInfo; - // Whether we're in the first or second phase of index validation. - bool _firstPhase; - // Populated during the second phase of validation, this map contains the index entries that // were pointing at an invalid document key. // The map contains a IndexKey pointing at a set of BSON objects as there may be multiple @@ -218,6 +230,55 @@ private: // index entry for a given IndexKey for each index. std::map<IndexKey, IndexEntryInfo> _missingIndexEntries; + // The total number of index keys is stored during the first validation phase, since this + // count may change during a second phase. + uint64_t _totalIndexKeys = 0; + + /** + * Return info for an index tracked by this with the given 'indexName'. + */ + IndexInfo& getIndexInfo(const std::string& indexName) { + return _indexesInfo.at(indexName); + } + + /** + * During the first phase of validation, given the document's key KeyString, increment the + * corresponding `_indexKeyCount` by hashing it. + * For the second phase of validation, keep track of the document keys that hashed to + * inconsistent hash buckets during the first phase of validation. + */ + void addDocKey(OperationContext* opCtx, + const KeyString::Value& ks, + IndexInfo* indexInfo, + const RecordId& recordId); + + /** + * During the first phase of validation, given the index entry's KeyString, decrement the + * corresponding `_indexKeyCount` by hashing it. + * For the second phase of validation, try to match the index entry keys that hashed to + * inconsistent hash buckets during the first phase of validation to document keys. + */ + void addIndexKey(OperationContext* opCtx, + const KeyString::Value& ks, + IndexInfo* indexInfo, + const RecordId& recordId, + ValidateResults* results); + + /** + * During the first phase of validation, tracks the multikey paths for every observed document. + */ + void addDocumentMultikeyPaths(IndexInfo* indexInfo, const MultikeyPaths& multikeyPaths); + + /** + * To validate $** multikey metadata paths, we first scan the collection and add a hash of all + * multikey paths encountered to a set. We then scan the index for multikey metadata path + * entries and remove any path encountered. As we expect the index to contain a super-set of + * the collection paths, a non-empty set represents an invalid index. + */ + void addMultikeyMetadataPath(const KeyString::Value& ks, IndexInfo* indexInfo); + void removeMultikeyMetadataPath(const KeyString::Value& ks, IndexInfo* indexInfo); + size_t getMultikeyMetadataPathCount(IndexInfo* indexInfo); + /** * Generates a key for the second phase of validation. The keys format is the following: * { @@ -241,5 +302,5 @@ private: */ uint32_t _hashKeyString(const KeyString::Value& ks, uint32_t indexNameHash) const; -}; // IndexConsistency +}; // KeyStringIndexConsistency } // namespace mongo diff --git a/src/mongo/db/catalog/validate_adaptor.cpp b/src/mongo/db/catalog/validate_adaptor.cpp index 64e2f3fe151..035f5be1eb2 100644 --- a/src/mongo/db/catalog/validate_adaptor.cpp +++ b/src/mongo/db/catalog/validate_adaptor.cpp @@ -39,11 +39,13 @@ #include "mongo/bson/util/bsoncolumn.h" #include "mongo/db/catalog/clustered_collection_util.h" #include "mongo/db/catalog/collection.h" +#include "mongo/db/catalog/column_index_consistency.h" #include "mongo/db/catalog/index_catalog.h" #include "mongo/db/catalog/index_consistency.h" #include "mongo/db/catalog/throttle_cursor.h" #include "mongo/db/concurrency/exception_util.h" #include "mongo/db/curop.h" +#include "mongo/db/index/columns_access_method.h" #include "mongo/db/index/index_access_method.h" #include "mongo/db/index/index_descriptor.h" #include "mongo/db/index/wildcard_access_method.h" @@ -71,11 +73,8 @@ namespace mongo { namespace { -MONGO_FAIL_POINT_DEFINE(crashOnMultikeyValidateFailure); - // Set limit for size of corrupted records that will be reported. const long long kMaxErrorSizeBytes = 1 * 1024 * 1024; -const long long kInterruptIntervalNumRecords = 4096; const long long kInterruptIntervalNumBytes = 50 * 1024 * 1024; // 50MB. static constexpr const char* kSchemaValidationFailedReason = @@ -575,373 +574,19 @@ Status ValidateAdaptor::validateRecord(OperationContext* opCtx, results); } - auto& executionCtx = StorageExecutionContext::get(opCtx); SharedBufferFragmentBuilder pool(KeyString::HeapBuilder::kHeapAllocatorDefaultBytes); for (const auto& index : _validateState->getIndexes()) { const IndexDescriptor* descriptor = index->descriptor(); - auto iam = index->accessMethod()->asSortedData(); - if (descriptor->isPartial() && !index->getFilterExpression()->matchesBSON(recordBson)) continue; - auto documentKeySet = executionCtx.keys(); - auto multikeyMetadataKeys = executionCtx.multikeyMetadataKeys(); - auto documentMultikeyPaths = executionCtx.multikeyPaths(); - - iam->getKeys(opCtx, - coll, - pool, - recordBson, - InsertDeleteOptions::ConstraintEnforcementMode::kEnforceConstraints, - SortedDataIndexAccessMethod::GetKeysContext::kAddingKeys, - documentKeySet.get(), - multikeyMetadataKeys.get(), - documentMultikeyPaths.get(), - recordId); - - bool shouldBeMultikey = iam->shouldMarkIndexAsMultikey( - documentKeySet->size(), - {multikeyMetadataKeys->begin(), multikeyMetadataKeys->end()}, - *documentMultikeyPaths); - - if (!index->isMultikey(opCtx, coll) && shouldBeMultikey) { - if (_validateState->fixErrors()) { - writeConflictRetry(opCtx, "setIndexAsMultikey", coll->ns().ns(), [&] { - WriteUnitOfWork wuow(opCtx); - coll->getIndexCatalog()->setMultikeyPaths( - opCtx, coll, descriptor, *multikeyMetadataKeys, *documentMultikeyPaths); - wuow.commit(); - }); - - LOGV2(4614700, - "Index set to multikey", - "indexName"_attr = descriptor->indexName(), - "collection"_attr = coll->ns().ns()); - results->warnings.push_back(str::stream() << "Index " << descriptor->indexName() - << " set to multikey."); - results->repaired = true; - } else { - auto& curRecordResults = (results->indexResultsMap)[descriptor->indexName()]; - std::string msg = fmt::format( - "Index {} is not multikey but has more than one key in document with " - "RecordId({}) and {}", - descriptor->indexName(), - recordId.toString(), - recordBson.getField("_id").toString()); - curRecordResults.errors.push_back(msg); - curRecordResults.valid = false; - if (crashOnMultikeyValidateFailure.shouldFail()) { - invariant(false, msg); - } - } - } - - if (index->isMultikey(opCtx, coll)) { - const MultikeyPaths& indexPaths = index->getMultikeyPaths(opCtx, coll); - if (!MultikeyPathTracker::covers(indexPaths, *documentMultikeyPaths.get())) { - if (_validateState->fixErrors()) { - writeConflictRetry(opCtx, "increaseMultikeyPathCoverage", coll->ns().ns(), [&] { - WriteUnitOfWork wuow(opCtx); - coll->getIndexCatalog()->setMultikeyPaths( - opCtx, coll, descriptor, *multikeyMetadataKeys, *documentMultikeyPaths); - wuow.commit(); - }); - - LOGV2(4614701, - "Multikey paths updated to cover multikey document", - "indexName"_attr = descriptor->indexName(), - "collection"_attr = coll->ns().ns()); - results->warnings.push_back(str::stream() << "Index " << descriptor->indexName() - << " multikey paths updated."); - results->repaired = true; - } else { - std::string msg = fmt::format( - "Index {} multikey paths do not cover a document with RecordId({}) and {}", - descriptor->indexName(), - recordId.toString(), - recordBson.getField("_id").toString()); - auto& curRecordResults = (results->indexResultsMap)[descriptor->indexName()]; - curRecordResults.errors.push_back(msg); - curRecordResults.valid = false; - } - } - } - - IndexInfo& indexInfo = _indexConsistency->getIndexInfo(descriptor->indexName()); - if (shouldBeMultikey) { - indexInfo.multikeyDocs = true; - } - - // An empty set of multikey paths indicates that this index does not track path-level - // multikey information and we should do no tracking. - if (shouldBeMultikey && documentMultikeyPaths->size()) { - _indexConsistency->addDocumentMultikeyPaths(&indexInfo, *documentMultikeyPaths); - } - - for (const auto& keyString : *multikeyMetadataKeys) { - try { - _indexConsistency->addMultikeyMetadataPath(keyString, &indexInfo); - } catch (...) { - return exceptionToStatus(); - } - } - for (const auto& keyString : *documentKeySet) { - try { - _totalIndexKeys++; - _indexConsistency->addDocKey(opCtx, keyString, &indexInfo, recordId); - } catch (...) { - return exceptionToStatus(); - } - } + this->traverseRecord(opCtx, coll, index.get(), recordId, recordBson, results); } return Status::OK(); } -namespace { -// Ensures that index entries are in increasing or decreasing order. -void _validateKeyOrder(OperationContext* opCtx, - const IndexCatalogEntry* index, - const KeyString::Value& currKey, - const KeyString::Value& prevKey, - IndexValidateResults* results) { - auto descriptor = index->descriptor(); - bool unique = descriptor->unique(); - - // KeyStrings will be in strictly increasing order because all keys are sorted and they are in - // the format (Key, RID), and all RecordIDs are unique. - if (currKey.compare(prevKey) <= 0) { - if (results && results->valid) { - results->errors.push_back(str::stream() - << "index '" << descriptor->indexName() - << "' is not in strictly ascending or descending order"); - } - if (results) { - results->valid = false; - } - return; - } - - if (unique) { - // Unique indexes must not have duplicate keys. - int cmp = currKey.compareWithoutRecordIdLong(prevKey); - if (cmp != 0) { - return; - } - - if (results && results->valid) { - auto bsonKey = KeyString::toBson(currKey, Ordering::make(descriptor->keyPattern())); - auto firstRecordId = - KeyString::decodeRecordIdLongAtEnd(prevKey.getBuffer(), prevKey.getSize()); - auto secondRecordId = - KeyString::decodeRecordIdLongAtEnd(currKey.getBuffer(), currKey.getSize()); - results->errors.push_back(str::stream() << "Unique index '" << descriptor->indexName() - << "' has duplicate key: " << bsonKey - << ", first record: " << firstRecordId - << ", second record: " << secondRecordId); - } - if (results) { - results->valid = false; - } - } -} -} // namespace - -void ValidateAdaptor::traverseIndex(OperationContext* opCtx, - const IndexCatalogEntry* index, - int64_t* numTraversedKeys, - ValidateResults* results) { - const IndexDescriptor* descriptor = index->descriptor(); - auto indexName = descriptor->indexName(); - auto& indexResults = results->indexResultsMap[indexName]; - IndexInfo& indexInfo = _indexConsistency->getIndexInfo(indexName); - int64_t numKeys = 0; - - bool isFirstEntry = true; - - // The progress meter will be inactive after traversing the record store to allow the message - // and the total to be set to different values. - if (!_progress.get(WithLock::withoutLock())->isActive()) { - const char* curopMessage = "Validate: scanning index entries"; - stdx::unique_lock<Client> lk(*opCtx->getClient()); - _progress.set( - lk, CurOp::get(opCtx)->setProgress_inlock(curopMessage, _totalIndexKeys), opCtx); - } - - const KeyString::Version version = - index->accessMethod()->asSortedData()->getSortedDataInterface()->getKeyStringVersion(); - - KeyString::Builder firstKeyStringBuilder( - version, BSONObj(), indexInfo.ord, KeyString::Discriminator::kExclusiveBefore); - KeyString::Value firstKeyString = firstKeyStringBuilder.getValueCopy(); - KeyString::Value prevIndexKeyStringValue; - - // Ensure that this index has an open index cursor. - const auto indexCursorIt = _validateState->getIndexCursors().find(indexName); - invariant(indexCursorIt != _validateState->getIndexCursors().end()); - - const std::unique_ptr<SortedDataInterfaceThrottleCursor>& indexCursor = indexCursorIt->second; - - boost::optional<KeyStringEntry> indexEntry; - try { - indexEntry = indexCursor->seekForKeyString(opCtx, firstKeyString); - } catch (const DBException& ex) { - if (TestingProctor::instance().isEnabled() && ex.code() != ErrorCodes::WriteConflict) { - LOGV2_FATAL(5318400, - "Error seeking to first key", - "error"_attr = ex.toString(), - "index"_attr = indexName, - "key"_attr = firstKeyString.toString()); - } - throw; - } - - const auto keyFormat = - index->accessMethod()->asSortedData()->getSortedDataInterface()->rsKeyFormat(); - const RecordId kWildcardMultikeyMetadataRecordId = record_id_helpers::reservedIdFor( - record_id_helpers::ReservationId::kWildcardMultikeyMetadataId, keyFormat); - - // Warn about unique indexes with keys in old format (without record id). - bool foundOldUniqueIndexKeys = false; - - while (indexEntry) { - if (!isFirstEntry) { - _validateKeyOrder( - opCtx, index, indexEntry->keyString, prevIndexKeyStringValue, &indexResults); - } - - if (!foundOldUniqueIndexKeys && !descriptor->isIdIndex() && descriptor->unique() && - !indexCursor->isRecordIdAtEndOfKeyString()) { - results->warnings.push_back( - fmt::format("Unique index {} has one or more keys in the old format (without " - "embedded record id). First record: {}", - indexInfo.indexName, - indexEntry->loc.toString())); - foundOldUniqueIndexKeys = true; - } - - bool isMetadataKey = indexEntry->loc == kWildcardMultikeyMetadataRecordId; - if (descriptor->getIndexType() == IndexType::INDEX_WILDCARD && isMetadataKey) { - _indexConsistency->removeMultikeyMetadataPath(indexEntry->keyString, &indexInfo); - } else { - try { - _indexConsistency->addIndexKey( - opCtx, indexEntry->keyString, &indexInfo, indexEntry->loc, results); - } catch (const DBException& e) { - StringBuilder ss; - ss << "Parsing index key for " << indexInfo.indexName << " recId " - << indexEntry->loc << " threw exception " << e.toString(); - results->errors.push_back(ss.str()); - results->valid = false; - } - } - { - stdx::unique_lock<Client> lk(*opCtx->getClient()); - _progress.get(lk)->hit(); - } - numKeys++; - isFirstEntry = false; - prevIndexKeyStringValue = indexEntry->keyString; - - if (numKeys % kInterruptIntervalNumRecords == 0) { - // Periodically checks for interrupts and yields. - opCtx->checkForInterrupt(); - _validateState->yield(opCtx); - } - - try { - indexEntry = indexCursor->nextKeyString(opCtx); - } catch (const DBException& ex) { - if (TestingProctor::instance().isEnabled() && ex.code() != ErrorCodes::WriteConflict) { - LOGV2_FATAL(5318401, - "Error advancing index cursor", - "error"_attr = ex.toString(), - "index"_attr = indexName, - "prevKey"_attr = prevIndexKeyStringValue.toString()); - } - throw; - } - } - - if (results && _indexConsistency->getMultikeyMetadataPathCount(&indexInfo) > 0) { - results->errors.push_back(str::stream() - << "Index '" << descriptor->indexName() - << "' has one or more missing multikey metadata index keys"); - results->valid = false; - } - - // Adjust multikey metadata when allowed. These states are all allowed by the design of - // multikey. A collection should still be valid without these adjustments. - if (_validateState->adjustMultikey()) { - - // If this collection has documents that make this index multikey, then check whether those - // multikey paths match the index's metadata. - auto indexPaths = index->getMultikeyPaths(opCtx, _validateState->getCollection()); - auto& documentPaths = indexInfo.docMultikeyPaths; - if (indexInfo.multikeyDocs && documentPaths != indexPaths) { - LOGV2(5367500, - "Index's multikey paths do not match those of its documents", - "index"_attr = descriptor->indexName(), - "indexPaths"_attr = MultikeyPathTracker::dumpMultikeyPaths(indexPaths), - "documentPaths"_attr = MultikeyPathTracker::dumpMultikeyPaths(documentPaths)); - - // Since we have the correct multikey path information for this index, we can tighten up - // its metadata to improve query performance. This may apply in two distinct scenarios: - // 1. Collection data has changed such that the current multikey paths on the index - // are too permissive and certain document paths are no longer multikey. - // 2. This index was built before 3.4, and there is no multikey path information for - // the index. We can effectively 'upgrade' the index so that it does not need to be - // rebuilt to update this information. - writeConflictRetry(opCtx, "updateMultikeyPaths", _validateState->nss().ns(), [&]() { - WriteUnitOfWork wuow(opCtx); - auto writeableIndex = const_cast<IndexCatalogEntry*>(index); - const bool isMultikey = true; - writeableIndex->forceSetMultikey( - opCtx, _validateState->getCollection(), isMultikey, documentPaths); - wuow.commit(); - }); - - if (results) { - results->warnings.push_back(str::stream() << "Updated index multikey metadata" - << ": " << descriptor->indexName()); - results->repaired = true; - } - } - - // If this index does not need to be multikey, then unset the flag. - if (index->isMultikey(opCtx, _validateState->getCollection()) && !indexInfo.multikeyDocs) { - invariant(!indexInfo.docMultikeyPaths.size()); - - LOGV2(5367501, - "Index is multikey but there are no multikey documents", - "index"_attr = descriptor->indexName()); - - // This makes an improvement in the case that no documents make the index multikey and - // the flag can be unset entirely. This may be due to a change in the data or historical - // multikey bugs that have persisted incorrect multikey infomation. - writeConflictRetry(opCtx, "unsetMultikeyPaths", _validateState->nss().ns(), [&]() { - WriteUnitOfWork wuow(opCtx); - auto writeableIndex = const_cast<IndexCatalogEntry*>(index); - const bool isMultikey = false; - writeableIndex->forceSetMultikey( - opCtx, _validateState->getCollection(), isMultikey, {}); - wuow.commit(); - }); - - if (results) { - results->warnings.push_back(str::stream() << "Unset index multikey metadata" - << ": " << descriptor->indexName()); - results->repaired = true; - } - } - } - - if (numTraversedKeys) { - *numTraversedKeys = numKeys; - } -} - void ValidateAdaptor::traverseRecordStore(OperationContext* opCtx, ValidateResults* results, BSONObjBuilder* output) { @@ -1093,7 +738,7 @@ void ValidateAdaptor::traverseRecordStore(OperationContext* opCtx, prevRecordId = record->id; - if (_numRecords % kInterruptIntervalNumRecords == 0 || + if (_numRecords % IndexConsistency::kInterruptIntervalNumRecords == 0 || interruptIntervalNumBytes >= kInterruptIntervalNumBytes) { // Periodically checks for interrupts and yields. opCtx->checkForInterrupt(); @@ -1125,78 +770,90 @@ void ValidateAdaptor::traverseRecordStore(OperationContext* opCtx, } } +bool isColumnStoreIndex(const IndexCatalogEntry* index) { + return index->descriptor()->getAccessMethodName() == IndexNames::COLUMN; +} + void ValidateAdaptor::validateIndexKeyCount(OperationContext* opCtx, const IndexCatalogEntry* index, IndexValidateResults& results) { - // Fetch the total number of index entries we previously found traversing the index. - const IndexDescriptor* desc = index->descriptor(); - const std::string indexName = desc->indexName(); - IndexInfo* indexInfo = &_indexConsistency->getIndexInfo(indexName); - auto numTotalKeys = indexInfo->numKeys; - - // Update numRecords by subtracting number of records removed from record store in repair mode - // when validating index consistency - _numRecords -= results.keysRemovedFromRecordStore; - - // Do not fail on finding too few index entries compared to collection entries when full:false. - bool hasTooFewKeys = false; - bool noErrorOnTooFewKeys = !_validateState->isFullIndexValidation(); - - if (desc->isIdIndex() && numTotalKeys != _numRecords) { - hasTooFewKeys = (numTotalKeys < _numRecords); - std::string msg = str::stream() - << "number of _id index entries (" << numTotalKeys - << ") does not match the number of documents in the index (" << _numRecords << ")"; - if (noErrorOnTooFewKeys && (numTotalKeys < _numRecords)) { - results.warnings.push_back(msg); - } else { - results.errors.push_back(msg); - results.valid = false; - } + if (isColumnStoreIndex(index)) { + _columnIndexConsistency.validateIndexKeyCount(opCtx, index, &_numRecords, results); + } else { + _keyBasedIndexConsistency.validateIndexKeyCount(opCtx, index, &_numRecords, results); } +} - // Hashed indexes may never be multikey. - if (desc->getAccessMethodName() == IndexNames::HASHED && - index->isMultikey(opCtx, _validateState->getCollection())) { - results.errors.push_back(str::stream() << "Hashed index is incorrectly marked multikey: " - << desc->indexName()); - results.valid = false; +void ValidateAdaptor::traverseIndex(OperationContext* opCtx, + const IndexCatalogEntry* index, + int64_t* numTraversedKeys, + ValidateResults* results) { + // The progress meter will be inactive after traversing the record store to allow the message + // and the total to be set to different values. + if (!_progress.get(WithLock::withoutLock())->isActive()) { + const char* curopMessage = "Validate: scanning index entries"; + stdx::unique_lock<Client> lk(*opCtx->getClient()); + _progress.set(lk, + CurOp::get(opCtx)->setProgress_inlock( + curopMessage, + isColumnStoreIndex(index) + ? _columnIndexConsistency.getTotalIndexKeys() + : _keyBasedIndexConsistency.getTotalIndexKeys()), + opCtx); } - // Confirm that the number of index entries is not greater than the number of documents in the - // collection. This check is only valid for indexes that are not multikey (indexed arrays - // produce an index key per array entry) and not $** indexes which can produce index keys for - // multiple paths within a single document. - if (results.valid && !index->isMultikey(opCtx, _validateState->getCollection()) && - desc->getIndexType() != IndexType::INDEX_WILDCARD && numTotalKeys > _numRecords) { - std::string err = str::stream() - << "index " << desc->indexName() << " is not multi-key, but has more entries (" - << numTotalKeys << ") than documents in the index (" << _numRecords << ")"; - results.errors.push_back(err); - results.valid = false; + int64_t numKeys = 0; + if (isColumnStoreIndex(index)) { + numKeys += _columnIndexConsistency.traverseIndex(opCtx, index, _progress, results); + } else { + numKeys += _keyBasedIndexConsistency.traverseIndex(opCtx, index, _progress, results); } - // Ignore any indexes with a special access method. If an access method name is given, the - // index may be a full text, geo or special index plugin with different semantics. - if (results.valid && !desc->isSparse() && !desc->isPartial() && !desc->isIdIndex() && - desc->getAccessMethodName() == "" && numTotalKeys < _numRecords) { - hasTooFewKeys = true; - std::string msg = str::stream() - << "index " << desc->indexName() << " is not sparse or partial, but has fewer entries (" - << numTotalKeys << ") than documents in the index (" << _numRecords << ")"; - if (noErrorOnTooFewKeys) { - results.warnings.push_back(msg); - } else { - results.errors.push_back(msg); - results.valid = false; - } + if (numTraversedKeys) { + *numTraversedKeys = numKeys; } +} - if (!_validateState->isFullIndexValidation() && hasTooFewKeys) { - std::string warning = str::stream() - << "index " << desc->indexName() << " has fewer keys than records." - << " Please re-run the validate command with {full: true}"; - results.warnings.push_back(warning); +void ValidateAdaptor::traverseRecord(OperationContext* opCtx, + const CollectionPtr& coll, + const IndexCatalogEntry* index, + const RecordId& recordId, + const BSONObj& record, + ValidateResults* results) { + if (isColumnStoreIndex(index)) { + _columnIndexConsistency.traverseRecord(opCtx, coll, index, recordId, record, results); + } else { + _keyBasedIndexConsistency.traverseRecord(opCtx, coll, index, recordId, record, results); } } + +void ValidateAdaptor::setSecondPhase() { + _columnIndexConsistency.setSecondPhase(); + _keyBasedIndexConsistency.setSecondPhase(); +} + +bool ValidateAdaptor::limitMemoryUsageForSecondPhase(ValidateResults* result) { + bool retVal = true; + retVal &= _columnIndexConsistency.limitMemoryUsageForSecondPhase(result); + retVal &= _keyBasedIndexConsistency.limitMemoryUsageForSecondPhase(result); + return retVal; +} + +bool ValidateAdaptor::haveEntryMismatch() const { + bool retVal = false; + retVal |= _columnIndexConsistency.haveEntryMismatch(); + retVal |= _keyBasedIndexConsistency.haveEntryMismatch(); + return retVal; +} + +void ValidateAdaptor::repairIndexEntries(OperationContext* opCtx, ValidateResults* results) { + _columnIndexConsistency.repairIndexEntries(opCtx, results); + _keyBasedIndexConsistency.repairIndexEntries(opCtx, results); +} + +void ValidateAdaptor::addIndexEntryErrors(OperationContext* opCtx, ValidateResults* results) { + _columnIndexConsistency.addIndexEntryErrors(opCtx, results); + _keyBasedIndexConsistency.addIndexEntryErrors(opCtx, results); +} + } // namespace mongo diff --git a/src/mongo/db/catalog/validate_adaptor.h b/src/mongo/db/catalog/validate_adaptor.h index 3217c7b5824..07d05164623 100644 --- a/src/mongo/db/catalog/validate_adaptor.h +++ b/src/mongo/db/catalog/validate_adaptor.h @@ -29,12 +29,15 @@ #pragma once +#include <array> + +#include "mongo/db/catalog/column_index_consistency.h" +#include "mongo/db/catalog/index_consistency.h" #include "mongo/db/catalog/validate_state.h" #include "mongo/util/progress_meter.h" namespace mongo { -class IndexConsistency; class IndexDescriptor; class OperationContext; @@ -44,10 +47,11 @@ class OperationContext; */ class ValidateAdaptor { public: - ValidateAdaptor(IndexConsistency* indexConsistency, - CollectionValidation::ValidateState* validateState) + ValidateAdaptor(OperationContext* opCtx, CollectionValidation::ValidateState* validateState) - : _indexConsistency(indexConsistency), _validateState(validateState) {} + : _keyBasedIndexConsistency(opCtx, validateState), + _columnIndexConsistency(opCtx, validateState), + _validateState(validateState) {} /** * Validates the record data and traverses through its key set to keep track of the @@ -59,6 +63,13 @@ public: long long* nNonCompliantDocuments, size_t* dataSize, ValidateResults* results); + /** + * Traverses the record store to retrieve every record and go through its document key + * set to keep track of the index consistency during a validation. + */ + void traverseRecordStore(OperationContext* opCtx, + ValidateResults* results, + BSONObjBuilder* output); /** * Traverses the index getting index entries to validate them and keep track of the index keys @@ -70,12 +81,14 @@ public: ValidateResults* results); /** - * Traverses the record store to retrieve every record and go through its document key - * set to keep track of the index consistency during a validation. + * Traverses a record on the underlying index consistency objects. */ - void traverseRecordStore(OperationContext* opCtx, - ValidateResults* results, - BSONObjBuilder* output); + void traverseRecord(OperationContext* opCtx, + const CollectionPtr& coll, + const IndexCatalogEntry* index, + const RecordId& recordId, + const BSONObj& record, + ValidateResults* results); /** * Validates that the number of document keys matches the number of index keys previously @@ -85,8 +98,38 @@ public: const IndexCatalogEntry* index, IndexValidateResults& results); + /** + * Informs the index consistency objects that we're advancing to the second phase of index + * validation. + */ + void setSecondPhase(); + + /** + * Sets up the index consistency objects to limit memory usage in the second phase of index + * validation. Returns whether the memory limit is sufficient to report at least one index entry + * inconsistency and continue with the second phase of validation. + */ + bool limitMemoryUsageForSecondPhase(ValidateResults* result); + + /** + * Returns true if the underlying index consistency objects have entry mismatches. + */ + bool haveEntryMismatch() const; + + /** + * If repair mode enabled, try inserting _missingIndexEntries into indexes. + */ + void repairIndexEntries(OperationContext* opCtx, ValidateResults* results); + + /** + * Records the errors gathered from the second phase of index validation into the provided + * ValidateResultsMap and ValidateResults. + */ + void addIndexEntryErrors(OperationContext* opCtx, ValidateResults* results); + private: - IndexConsistency* _indexConsistency; + KeyStringIndexConsistency _keyBasedIndexConsistency; + ColumnIndexConsistency _columnIndexConsistency; CollectionValidation::ValidateState* _validateState; // Saves the record count from the record store traversal to be used later to validate the index @@ -95,9 +138,5 @@ private: // For reporting progress during record store and index traversal. ProgressMeterHolder _progress; - - // The total number of index keys is stored during the first validation phase, since this - // count may change during a second phase. - uint64_t _totalIndexKeys = 0; }; } // namespace mongo diff --git a/src/mongo/db/catalog/validate_state.cpp b/src/mongo/db/catalog/validate_state.cpp index 9f0fd873f59..304f664005b 100644 --- a/src/mongo/db/catalog/validate_state.cpp +++ b/src/mongo/db/catalog/validate_state.cpp @@ -38,6 +38,7 @@ #include "mongo/db/catalog/index_consistency.h" #include "mongo/db/catalog/validate_adaptor.h" #include "mongo/db/db_raii.h" +#include "mongo/db/index/columns_access_method.h" #include "mongo/db/index/index_access_method.h" #include "mongo/db/operation_context.h" #include "mongo/db/storage/durable_catalog.h" @@ -192,6 +193,9 @@ void ValidateState::_yieldCursors(OperationContext* opCtx) { for (const auto& indexCursor : _indexCursors) { indexCursor.second->save(); } + for (const auto& indexCursor : _columnStoreIndexCursors) { + indexCursor.second->save(); + } _traverseRecordStoreCursor->save(); _seekRecordStoreCursor->save(); @@ -200,6 +204,9 @@ void ValidateState::_yieldCursors(OperationContext* opCtx) { for (const auto& indexCursor : _indexCursors) { indexCursor.second->restore(); } + for (const auto& indexCursor : _columnStoreIndexCursors) { + indexCursor.second->restore(); + } uassert(ErrorCodes::Interrupted, "Interrupted due to: failure to restore yielded traverse cursor", @@ -209,9 +216,16 @@ void ValidateState::_yieldCursors(OperationContext* opCtx) { _seekRecordStoreCursor->restore()); } +bool ValidateState::_isIndexDataCheckpointed(OperationContext* opCtx, + const IndexCatalogEntry* entry) { + return isBackground() && + opCtx->getServiceContext()->getStorageEngine()->isInIndividuallyCheckpointedIndexes( + entry->getIdent()); +} + void ValidateState::initializeCursors(OperationContext* opCtx) { invariant(!_traverseRecordStoreCursor && !_seekRecordStoreCursor && _indexCursors.size() == 0 && - _indexes.size() == 0); + _columnStoreIndexCursors.size() == 0 && _indexes.size() == 0); // Background validation reads from the last stable checkpoint instead of the latest data. This // allows concurrent writes to go ahead without interfering with validation's view of the data. @@ -292,27 +306,51 @@ void ValidateState::initializeCursors(OperationContext* opCtx) { continue; } - auto iam = entry->accessMethod()->asSortedData(); - if (!iam) - continue; - - _indexCursors.emplace( - desc->indexName(), - std::make_unique<SortedDataInterfaceThrottleCursor>(opCtx, iam, &_dataThrottle)); + if (entry->descriptor()->getAccessMethodName() == IndexNames::COLUMN) { + const auto iam = entry->accessMethod(); + _columnStoreIndexCursors.emplace( + desc->indexName(), + static_cast<ColumnStoreAccessMethod*>(iam)->storage()->newCursor(opCtx)); + + // Skip any newly created indexes that, because they were built with a WT bulk loader, + // are checkpoint'ed but not yet consistent with the rest of checkpoint's PIT view of + // the data. + if (_isIndexDataCheckpointed(opCtx, entry)) { + _columnStoreIndexCursors.erase(desc->indexName()); + LOGV2( + 7106110, + "Skipping background validation on the index because the index data is not yet " + "consistent in the checkpoint.", + "desc_indexName"_attr = desc->indexName(), + "nss"_attr = _nss); + continue; + } + } else { + const auto iam = entry->accessMethod()->asSortedData(); + if (!iam) { + LOGV2(6325100, + "[Debugging] skipping index {index_name} because it isn't SortedData", + "index_name"_attr = desc->indexName()); + continue; + } - // Skip any newly created indexes that, because they were built with a WT bulk loader, - // are checkpoint'ed but not yet consistent with the rest of checkpoint's PIT view of - // the data. - if (isBackground() && - opCtx->getServiceContext()->getStorageEngine()->isInIndividuallyCheckpointedIndexes( - diskIndexIdent)) { - _indexCursors.erase(desc->indexName()); - LOGV2(6868903, - "Skipping background validation on the index because the index data is not yet " - "consistent in the checkpoint.", - "desc_indexName"_attr = desc->indexName(), - "nss"_attr = _nss); - continue; + _indexCursors.emplace( + desc->indexName(), + std::make_unique<SortedDataInterfaceThrottleCursor>(opCtx, iam, &_dataThrottle)); + + // Skip any newly created indexes that, because they were built with a WT bulk loader, + // are checkpoint'ed but not yet consistent with the rest of checkpoint's PIT view of + // the data. + if (_isIndexDataCheckpointed(opCtx, entry)) { + _indexCursors.erase(desc->indexName()); + LOGV2( + 6868903, + "Skipping background validation on the index because the index data is not yet " + "consistent in the checkpoint.", + "desc_indexName"_attr = desc->indexName(), + "nss"_attr = _nss); + continue; + } } _indexes.push_back(indexCatalog->getEntryShared(desc)); diff --git a/src/mongo/db/catalog/validate_state.h b/src/mongo/db/catalog/validate_state.h index c508a6b5850..a5902ca52ff 100644 --- a/src/mongo/db/catalog/validate_state.h +++ b/src/mongo/db/catalog/validate_state.h @@ -148,8 +148,7 @@ public: /** * Map of index names to index cursors. */ - const std::map<std::string, std::unique_ptr<SortedDataInterfaceThrottleCursor>>& - getIndexCursors() const { + const StringMap<std::unique_ptr<SortedDataInterfaceThrottleCursor>>& getIndexCursors() const { return _indexCursors; } @@ -161,6 +160,10 @@ public: return _seekRecordStoreCursor; } + const StringMap<std::unique_ptr<ColumnStore::Cursor>>& getColumnStoreCursors() const { + return _columnStoreIndexCursors; + } + RecordId getFirstRecordId() const { return _firstRecordId; } @@ -227,6 +230,8 @@ private: */ void _yieldCursors(OperationContext* opCtx); + bool _isIndexDataCheckpointed(OperationContext* opCtx, const IndexCatalogEntry* entry); + NamespaceString _nss; ValidateMode _mode; RepairMode _repairMode; @@ -253,9 +258,10 @@ private: std::vector<std::shared_ptr<const IndexCatalogEntry>> _indexes; // Shared cursors to be used during validation, created in 'initializeCursors()'. - std::map<std::string, std::unique_ptr<SortedDataInterfaceThrottleCursor>> _indexCursors; + StringMap<std::unique_ptr<SortedDataInterfaceThrottleCursor>> _indexCursors; std::unique_ptr<SeekableRecordThrottleCursor> _traverseRecordStoreCursor; std::unique_ptr<SeekableRecordThrottleCursor> _seekRecordStoreCursor; + StringMap<std::unique_ptr<ColumnStore::Cursor>> _columnStoreIndexCursors; RecordId _firstRecordId; diff --git a/src/mongo/db/index/columns_access_method.cpp b/src/mongo/db/index/columns_access_method.cpp index 3aa913a0f19..96af1ddfd50 100644 --- a/src/mongo/db/index/columns_access_method.cpp +++ b/src/mongo/db/index/columns_access_method.cpp @@ -265,7 +265,6 @@ Status ColumnStoreAccessMethod::BulkBuilder::keyCommitted( return Status::OK(); } - void ColumnStoreAccessMethod::_visitCellsForIndexInsert( OperationContext* opCtx, PooledFragmentBuilder& buf, @@ -375,6 +374,9 @@ Status ColumnStoreAccessMethod::update(OperationContext* opCtx, [&](column_keygen::ColumnKeyGenerator::DiffAction diffAction, StringData path, const column_keygen::UnencodedCellView* cell) { + // TODO SERVER-72351: This is a temporary fix for SERVER-72351. + ON_BLOCK_EXIT([&]() { columnKeys->clear(); }); + if (diffAction == column_keygen::ColumnKeyGenerator::DiffAction::kDelete) { columnKeys->emplace_back(std::make_tuple(path.toString(), "", rid)); int64_t removed = 0; diff --git a/src/mongo/db/index/columns_access_method.h b/src/mongo/db/index/columns_access_method.h index 8d67306fdd7..cbb195c52fa 100644 --- a/src/mongo/db/index/columns_access_method.h +++ b/src/mongo/db/index/columns_access_method.h @@ -48,6 +48,10 @@ public: ColumnStoreAccessMethod(IndexCatalogEntry* ice, std::unique_ptr<ColumnStore>); + const column_keygen::ColumnKeyGenerator& getKeyGen() const { + return _keyGen; + } + /** * Returns a pointer to the ColumnstoreProjection owned by the underlying ColumnKeyGenerator. */ @@ -123,8 +127,16 @@ public: return _store.get(); } + ColumnStore* writableStorage() const { + return _store.get(); + } + class BulkBuilder; + const std::string& indexName() const { + return _descriptor->indexName(); + } + /** * Returns true iff 'compressor' is a recognized name of a block compression module that is * supported for use with the column store index. diff --git a/src/mongo/db/storage/column_store.h b/src/mongo/db/storage/column_store.h index 3886054a022..d22b8386dc6 100644 --- a/src/mongo/db/storage/column_store.h +++ b/src/mongo/db/storage/column_store.h @@ -53,6 +53,18 @@ struct FullCellView { CellView value; }; +/** + * An owned representation of a column-store index entry cell. + */ +struct FullCellValue { + PathValue path; + RowId rid; + CellValue value; + + FullCellValue(FullCellView fcv) : path(fcv.path), rid(fcv.rid), value(fcv.value) {} + FullCellValue(PathView p, RowId r, CellView v) : path(p), rid(r), value(v) {} +}; + struct CellViewForPath { RowId rid; CellView value; @@ -66,10 +78,8 @@ struct CellViewForPath { * stores tuples of Path, RecordId and Value in a separate format. */ class ColumnStore { -protected: - class Cursor; - public: + class Cursor; class WriteCursor { public: virtual ~WriteCursor() = default; @@ -376,7 +386,10 @@ public: _ident = std::move(newIdent); } -protected: + /** + * The Cursor class is for raw access to the index. Except for unusual use cases (e.g., index + * validation) you'll want to use CursorForPath instead. + */ class Cursor { public: virtual ~Cursor() = default; diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp index aaaf5c27041..263d0bcbf67 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp @@ -270,8 +270,11 @@ public: if (!_eof && !_lastMoveSkippedKey) { WT_ITEM key; WT_CURSOR* c = _cursor->get(); - c->get_key(c, &key); - _buffer.assign(static_cast<const char*>(key.data), key.size); + if (c->get_key(c, &key) == 0) { + _buffer.assign(static_cast<const char*>(key.data), key.size); + } else { + _buffer.clear(); + } } resetCursor(); } @@ -321,6 +324,10 @@ private: WT_CURSOR* c = _cursor->get(); + if (searchKey.empty()) { + return false; + } + const WiredTigerItem searchKeyItem(searchKey); c->set_key(c, searchKeyItem.Get()); diff --git a/src/mongo/dbtests/storage_debug_util.cpp b/src/mongo/dbtests/storage_debug_util.cpp index 9cf0b3c640a..89f5ca48582 100644 --- a/src/mongo/dbtests/storage_debug_util.cpp +++ b/src/mongo/dbtests/storage_debug_util.cpp @@ -72,7 +72,7 @@ void printCollectionAndIndexTableEntries(OperationContext* opCtx, const Namespac const auto indexDescriptor = indexCatalogEntry->descriptor(); const auto iam = indexCatalogEntry->accessMethod()->asSortedData(); if (!iam) { - LOGV2(6325100, + LOGV2(6325101, "[Debugging] skipping index {index_name} because it isn't SortedData", "index_name"_attr = indexDescriptor->indexName()); continue; |