summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorMohammad Dashti <mdashti@gmail.com>2023-01-05 10:51:14 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-01-05 11:55:01 +0000
commit72a56c27b43595e898ccdec5d859fe72b0d17135 (patch)
treece5cfe83b2957b3e79deae2ee0706047fc01453c /src/mongo
parent6908c42231a9e8ac6611aa745cf37be31f954bd5 (diff)
downloadmongo-72a56c27b43595e898ccdec5d859fe72b0d17135.tar.gz
SERVER-71061 Added column-store index validation
Co-authored-by: Colin Stolley <colin.stolley@mongodb.com>
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/catalog/README.md16
-rw-r--r--src/mongo/db/catalog/SConscript5
-rw-r--r--src/mongo/db/catalog/collection_validation.cpp25
-rw-r--r--src/mongo/db/catalog/collection_validation_test.cpp760
-rw-r--r--src/mongo/db/catalog/column_index_consistency.cpp282
-rw-r--r--src/mongo/db/catalog/column_index_consistency.h201
-rw-r--r--src/mongo/db/catalog/column_index_consistency_test.cpp261
-rw-r--r--src/mongo/db/catalog/database_impl.cpp19
-rw-r--r--src/mongo/db/catalog/index_consistency.cpp511
-rw-r--r--src/mongo/db/catalog/index_consistency.h221
-rw-r--r--src/mongo/db/catalog/validate_adaptor.cpp497
-rw-r--r--src/mongo/db/catalog/validate_adaptor.h67
-rw-r--r--src/mongo/db/catalog/validate_state.cpp80
-rw-r--r--src/mongo/db/catalog/validate_state.h12
-rw-r--r--src/mongo/db/index/columns_access_method.cpp4
-rw-r--r--src/mongo/db/index/columns_access_method.h12
-rw-r--r--src/mongo/db/storage/column_store.h21
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp11
-rw-r--r--src/mongo/dbtests/storage_debug_util.cpp2
19 files changed, 2357 insertions, 650 deletions
diff --git a/src/mongo/db/catalog/README.md b/src/mongo/db/catalog/README.md
index 005345a996e..35ccea2bbb8 100644
--- a/src/mongo/db/catalog/README.md
+++ b/src/mongo/db/catalog/README.md
@@ -1399,7 +1399,7 @@ Additionally, users can specify that they'd like to perform a `full` validation.
for the collection and each index, data throttling (for background validation), and general
information about the collection.
+ [IndexConsistency](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/index_consistency.h)
- keeps track of the number of keys detected in the record store and indexes. Detects when there
+ descendents keep track of the number of keys detected in the record store and indexes. Detects when there
are index inconsistencies and maintains the information about the inconsistencies for
reporting.
+ [ValidateAdaptor](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/validate_adaptor.h)
@@ -1418,15 +1418,15 @@ Additionally, users can specify that they'd like to perform a `full` validation.
+ We choose a read timestamp (`ReadSource`) based on the validation mode: `kNoTimestamp`
for foreground validation and `kCheckpoint` for background validation.
* Traverses the `RecordStore` using the `ValidateAdaptor` object.
- + [Validates each record and adds the document's index key set to the IndexConsistency object](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/validate_adaptor.cpp#L61-L140)
+ + [Validates each record and adds the document's index key set to the IndexConsistency objects](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/validate_adaptor.cpp#L61-L140)
for consistency checks at later stages.
- + In an effort to reduce the memory footprint of validation, the `IndexConsistency` object
+ + In an effort to reduce the memory footprint of validation, the `IndexConsistency` objects
[hashes](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/index_consistency.cpp#L307-L309)
- the keys passed in to one of many buckets.
- + Document keys will
+ the keys (or paths) passed in to one of many buckets.
+ + Document keys (or paths) will
[increment](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/index_consistency.cpp#L204-L214)
the respective bucket.
- + Index keys will
+ + Index keys (paths) will
[decrement](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/index_consistency.cpp#L239-L248)
the respective bucket.
+ Checks that the `RecordId` is in [increasing order](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/validate_adaptor.cpp#L305-L308).
@@ -1434,8 +1434,8 @@ Additionally, users can specify that they'd like to perform a `full` validation.
stored in the `RecordStore` (when performing a foreground validation only).
* Traverses the index entries for each index in the collection.
+ [Validates the index key order to ensure that index entries are in increasing or decreasing order](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/validate_adaptor.cpp#L144-L188).
- + Adds the index key to the `IndexConsistency` object for consistency checks at later stages.
-* After the traversals are finished, the `IndexConsistency` object is checked to detect any
+ + Adds the index key to the `IndexConsistency` objects for consistency checks at later stages.
+* After the traversals are finished, the `IndexConsistency` objects are checked to detect any
inconsistencies between the collection and indexes.
+ If a bucket has a `value of 0`, then there are no inconsistencies for the keys that hashed
there.
diff --git a/src/mongo/db/catalog/SConscript b/src/mongo/db/catalog/SConscript
index f392123499e..38a6ed37ffe 100644
--- a/src/mongo/db/catalog/SConscript
+++ b/src/mongo/db/catalog/SConscript
@@ -375,6 +375,7 @@ env.Library(
target='catalog_impl',
source=[
'collection_impl.cpp',
+ 'column_index_consistency.cpp',
'database_holder_impl.cpp',
'database_impl.cpp',
'index_catalog_entry_impl.cpp',
@@ -389,6 +390,7 @@ env.Library(
'$BUILD_DIR/mongo/db/concurrency/lock_manager',
'$BUILD_DIR/mongo/db/curop',
'$BUILD_DIR/mongo/db/dbcommands_idl',
+ '$BUILD_DIR/mongo/db/index/column_store_index',
'$BUILD_DIR/mongo/db/index/index_access_method',
'$BUILD_DIR/mongo/db/index/index_access_method_factory',
'$BUILD_DIR/mongo/db/index/index_access_methods',
@@ -463,6 +465,7 @@ env.Library(
'$BUILD_DIR/mongo/db/concurrency/exception_util',
'$BUILD_DIR/mongo/db/curop',
'$BUILD_DIR/mongo/db/index/index_access_method',
+ '$BUILD_DIR/mongo/db/index/index_access_methods',
'$BUILD_DIR/mongo/db/multi_key_path_tracker',
'$BUILD_DIR/mongo/db/record_id_helpers',
'$BUILD_DIR/mongo/db/server_base',
@@ -663,6 +666,7 @@ if wiredtiger:
'collection_validation_test.cpp',
'collection_writer_test.cpp',
'coll_mod_test.cpp',
+ 'column_index_consistency_test.cpp',
'commit_quorum_options_test.cpp',
'create_collection_test.cpp',
'database_test.cpp',
@@ -682,6 +686,7 @@ if wiredtiger:
'$BUILD_DIR/mongo/db/catalog/collection_crud',
'$BUILD_DIR/mongo/db/commands/test_commands_enabled',
'$BUILD_DIR/mongo/db/index/index_access_method',
+ '$BUILD_DIR/mongo/db/index/index_access_methods',
'$BUILD_DIR/mongo/db/index_builds_coordinator_mongod',
'$BUILD_DIR/mongo/db/multitenancy',
'$BUILD_DIR/mongo/db/op_observer/op_observer',
diff --git a/src/mongo/db/catalog/collection_validation.cpp b/src/mongo/db/catalog/collection_validation.cpp
index 2c6661a4f5c..0f07a819980 100644
--- a/src/mongo/db/catalog/collection_validation.cpp
+++ b/src/mongo/db/catalog/collection_validation.cpp
@@ -174,11 +174,10 @@ void _validateIndexes(OperationContext* opCtx,
*/
void _gatherIndexEntryErrors(OperationContext* opCtx,
ValidateState* validateState,
- IndexConsistency* indexConsistency,
ValidateAdaptor* indexValidator,
ValidateResults* result) {
- indexConsistency->setSecondPhase();
- if (!indexConsistency->limitMemoryUsageForSecondPhase(result)) {
+ indexValidator->setSecondPhase();
+ if (!indexValidator->limitMemoryUsageForSecondPhase(result)) {
return;
}
@@ -223,12 +222,12 @@ void _gatherIndexEntryErrors(OperationContext* opCtx,
}
if (validateState->fixErrors()) {
- indexConsistency->repairMissingIndexEntries(opCtx, result);
+ indexValidator->repairIndexEntries(opCtx, result);
}
LOGV2_OPTIONS(20301, {LogComponent::kIndex}, "Finished traversing through all the indexes");
- indexConsistency->addIndexEntryErrors(result);
+ indexValidator->addIndexEntryErrors(opCtx, result);
}
void _validateIndexKeyCount(OperationContext* opCtx,
@@ -556,8 +555,7 @@ Status validate(OperationContext* opCtx,
logAttrs(validateState.nss()),
logAttrs(validateState.uuid()));
- IndexConsistency indexConsistency(opCtx, &validateState);
- ValidateAdaptor indexValidator(&indexConsistency, &validateState);
+ ValidateAdaptor indexValidator(opCtx, &validateState);
// In traverseRecordStore(), the index validator keeps track the records in the record
// store so that _validateIndexes() can confirm that the index entries match the records in
@@ -569,10 +567,10 @@ Status validate(OperationContext* opCtx,
// Pause collection validation while a lock is held and between collection and index data
// validation.
//
- // The IndexConsistency object saves document key information during collection data
- // validation and then compares against that key information during index data validation.
- // This fail point is placed in between them, in an attempt to catch any inconsistencies
- // that concurrent CRUD ops might cause if we were to have a bug.
+ // The KeyStringIndexConsistency object saves document key information during collection
+ // data validation and then compares against that key information during index data
+ // validation. This fail point is placed in between them, in an attempt to catch any
+ // inconsistencies that concurrent CRUD ops might cause if we were to have a bug.
//
// Only useful for background validation because we hold an intent lock instead of an
// exclusive lock, and thus allow concurrent operations.
@@ -592,14 +590,13 @@ Status validate(OperationContext* opCtx,
// Validate indexes and check for mismatches.
_validateIndexes(opCtx, &validateState, &indexValidator, results);
- if (indexConsistency.haveEntryMismatch()) {
+ if (indexValidator.haveEntryMismatch()) {
LOGV2_OPTIONS(20305,
{LogComponent::kIndex},
"Index inconsistencies were detected. "
"Starting the second phase of index validation to gather concise errors",
"namespace"_attr = validateState.nss());
- _gatherIndexEntryErrors(
- opCtx, &validateState, &indexConsistency, &indexValidator, results);
+ _gatherIndexEntryErrors(opCtx, &validateState, &indexValidator, results);
}
if (!results->valid) {
diff --git a/src/mongo/db/catalog/collection_validation_test.cpp b/src/mongo/db/catalog/collection_validation_test.cpp
index 15f8f475aec..19cfe013d13 100644
--- a/src/mongo/db/catalog/collection_validation_test.cpp
+++ b/src/mongo/db/catalog/collection_validation_test.cpp
@@ -27,13 +27,18 @@
* it in the license file.
*/
+#include <string>
+
#include "mongo/bson/util/builder.h"
#include "mongo/db/catalog/catalog_test_fixture.h"
#include "mongo/db/catalog/collection_validation.h"
#include "mongo/db/catalog/collection_write_path.h"
+#include "mongo/db/catalog/column_index_consistency.h"
#include "mongo/db/db_raii.h"
+#include "mongo/db/index/column_key_generator.h"
#include "mongo/db/index/index_access_method.h"
#include "mongo/db/operation_context.h"
+#include "mongo/idl/server_parameter_test_util.h"
#include "mongo/stdx/thread.h"
#include "mongo/unittest/unittest.h"
#include "mongo/util/bufreader.h"
@@ -53,7 +58,7 @@ private:
CatalogTestFixture::setUp();
// Create collection kNss for unit tests to use. It will possess a default _id index.
- CollectionOptions defaultCollectionOptions;
+ const CollectionOptions defaultCollectionOptions;
ASSERT_OK(storageInterface()->createCollection(
operationContext(), kNss, defaultCollectionOptions));
};
@@ -66,12 +71,13 @@ protected:
};
/**
- * Calls validate on collection kNss with both kValidateFull and kValidateNormal validation levels
+ * Calls validate on collection nss with both kValidateFull and kValidateNormal validation levels
* and verifies the results.
*
- * Returns list of validate results.
+ * Returns the list of validation results.
*/
std::vector<std::pair<BSONObj, ValidateResults>> foregroundValidate(
+ const NamespaceString& nss,
OperationContext* opCtx,
bool valid,
int numRecords,
@@ -86,7 +92,7 @@ std::vector<std::pair<BSONObj, ValidateResults>> foregroundValidate(
ValidateResults validateResults;
BSONObjBuilder output;
ASSERT_OK(CollectionValidation::validate(
- opCtx, kNss, mode, repairMode, &validateResults, &output));
+ opCtx, nss, mode, repairMode, &validateResults, &output));
BSONObj obj = output.obj();
BSONObjBuilder validateResultsBuilder;
validateResults.appendToResultObj(&validateResultsBuilder, true /* debugging */);
@@ -127,17 +133,21 @@ ValidateResults omitTransientWarnings(const ValidateResults& results) {
}
/**
- * Calls validate on collection kNss with {background:true} and verifies the results.
+ * Calls validate on collection nss with {background:true} and verifies the results.
* If 'runForegroundAsWell' is set, then foregroundValidate() above will be run in addition.
+ *
+ * Returns the list of validation results.
*/
-void backgroundValidate(OperationContext* opCtx,
- bool valid,
- int numRecords,
- int numInvalidDocuments,
- int numErrors,
- bool runForegroundAsWell) {
+std::vector<std::pair<BSONObj, ValidateResults>> backgroundValidate(const NamespaceString& nss,
+ OperationContext* opCtx,
+ bool valid,
+ int numRecords,
+ int numInvalidDocuments,
+ int numErrors,
+ bool runForegroundAsWell) {
+ std::vector<std::pair<BSONObj, ValidateResults>> res;
if (runForegroundAsWell) {
- foregroundValidate(opCtx, valid, numRecords, numInvalidDocuments, numErrors);
+ res = foregroundValidate(nss, opCtx, valid, numRecords, numInvalidDocuments, numErrors);
}
// This function will force a checkpoint, so background validation can then read from that
@@ -149,7 +159,7 @@ void backgroundValidate(OperationContext* opCtx,
ValidateResults validateResults;
BSONObjBuilder output;
ASSERT_OK(CollectionValidation::validate(opCtx,
- kNss,
+ nss,
CollectionValidation::ValidateMode::kBackground,
CollectionValidation::RepairMode::kNone,
&validateResults,
@@ -161,24 +171,32 @@ void backgroundValidate(OperationContext* opCtx,
ASSERT_EQ(obj.getIntField("nrecords"), numRecords);
ASSERT_EQ(obj.getIntField("nInvalidDocuments"), numInvalidDocuments);
+
+ res.push_back({std::make_pair(obj, validateResults)});
+
+ return res;
}
/**
- * Inserts a range of documents into the kNss collection and then returns that count.
- * The range is defined by [startIDNum, endIDNum), not inclusive of endIDNum, using the numbers as
- * values for '_id' of the document being inserted.
+ * Inserts a range of documents into the nss collection and then returns that count. The range is
+ * defined by [startIDNum, startIDNum+numDocs), not inclusive of (startIDNum+numDocs), using the
+ * numbers as values for '_id' of the document being inserted followed by numFields fields.
*/
-int insertDataRange(OperationContext* opCtx, int startIDNum, int endIDNum) {
- invariant(startIDNum < endIDNum,
- str::stream() << "attempted to insert invalid data range from " << startIDNum
- << " to " << endIDNum);
-
-
- AutoGetCollection coll(opCtx, kNss, MODE_IX);
+int insertDataRangeForNumFields(const NamespaceString& nss,
+ OperationContext* opCtx,
+ const int startIDNum,
+ const int numDocs,
+ const int numFields) {
+ const AutoGetCollection coll(opCtx, nss, MODE_IX);
std::vector<InsertStatement> inserts;
- for (int i = startIDNum; i < endIDNum; ++i) {
- auto doc = BSON("_id" << i);
- inserts.push_back(InsertStatement(doc));
+ for (int i = 0; i < numDocs; ++i) {
+ BSONObjBuilder bsonBuilder;
+ bsonBuilder << "_id" << i + startIDNum;
+ for (int c = 1; c <= numFields; ++c) {
+ bsonBuilder << "a" + std::to_string(c) << i + (i * numFields + startIDNum) + c;
+ }
+ const auto obj = bsonBuilder.obj();
+ inserts.push_back(InsertStatement(obj));
}
{
@@ -187,7 +205,20 @@ int insertDataRange(OperationContext* opCtx, int startIDNum, int endIDNum) {
opCtx, *coll, inserts.begin(), inserts.end(), nullptr, false));
wuow.commit();
}
- return endIDNum - startIDNum;
+ return numDocs;
+}
+
+/**
+ * Inserts a range of documents into the kNss collection and then returns that count. The range is
+ * defined by [startIDNum, endIDNum), not inclusive of endIDNum, using the numbers as values for
+ * '_id' of the document being inserted.
+ */
+int insertDataRange(OperationContext* opCtx, const int startIDNum, const int endIDNum) {
+ invariant(startIDNum < endIDNum,
+ str::stream() << "attempted to insert invalid data range from " << startIDNum
+ << " to " << endIDNum);
+
+ return insertDataRangeForNumFields(kNss, opCtx, startIDNum, endIDNum - startIDNum, 0);
}
/**
@@ -209,17 +240,301 @@ int setUpInvalidData(OperationContext* opCtx) {
return 1;
}
+class CollectionValidationColumnStoreIndexTest : public CollectionValidationDiskTest {
+protected:
+ CollectionValidationColumnStoreIndexTest() : CollectionValidationDiskTest() {}
+
+ const BSONObj kColumnStoreSpec = BSON("name"
+ << "$**_columnstore"
+ << "key"
+ << BSON("$**"
+ << "columnstore")
+ << "v" << 2);
+
+ /**
+ * This method decorates the execution of column-store index validation tests. It works by
+ * 1) creating an 'nss' namespace with a column-store index, 2) inserting documents into this
+ * namespace (via calling 'insertDocsFn'), 3) running validation on this collection and making
+ * sure it's valid, 4) running 'modifyIndexContentsFn', and 5) running 'postCheckFn', which
+ * usually contains an index validation call with relevant assertions.
+ *
+ * Returns the list of validation results.
+ */
+ std::vector<std::pair<BSONObj, ValidateResults>> runColumnStoreIndexTest(
+ const NamespaceString& nss,
+ std::function<int(OperationContext*, repl::StorageInterface*)> insertDocsFn,
+ std::function<void(OperationContext*, ColumnStore*, int)> modifyIndexContentsFn,
+ std::function<std::vector<std::pair<BSONObj, ValidateResults>>(OperationContext*, int)>
+ postCheckFn) {
+
+ RAIIServerParameterControllerForTest controller("featureFlagColumnstoreIndexes", true);
+
+ auto opCtx = operationContext();
+
+ {
+ // Durable catalog expects metadata updates to be timestamped but this is
+ // not necessary in our case - we just want to check the contents of the index table.
+ // The alternative here would be to provide a commit timestamp with a TimestamptBlock.
+ repl::UnreplicatedWritesBlock uwb(opCtx);
+
+ ASSERT_OK(
+ storageInterface()->createIndexesOnEmptyCollection(opCtx, nss, {kColumnStoreSpec}));
+ }
+
+ const auto numRecords = insertDocsFn(opCtx, storageInterface());
+
+ // Validate the collection here as a sanity check before we modify the index contents
+ // in-place.
+ foregroundValidate(nss,
+ opCtx,
+ /*valid*/ true,
+ /*numRecords*/ numRecords,
+ /*numInvalidDocuments*/ 0,
+ /*numErrors*/ 0);
+
+ {
+ AutoGetCollection autoColl(opCtx, nss, MODE_IX);
+
+ const auto indexCatalog = autoColl->getIndexCatalog();
+ const auto descriptor = indexCatalog->findIndexByName(opCtx, "$**_columnstore");
+ ASSERT(descriptor) << "Cannot find $**_columnstore in index catalog";
+ const auto entry = indexCatalog->getEntry(descriptor);
+ ASSERT(entry) << "Cannot look up index catalog entry for index $**_columnstore";
+
+ const auto columnStore =
+ dynamic_cast<ColumnStoreAccessMethod*>(entry->accessMethod())->writableStorage();
+ ASSERT_FALSE(columnStore->isEmpty(opCtx))
+ << "index $**_columnstore should not be empty";
+
+ modifyIndexContentsFn(opCtx, columnStore, numRecords);
+ }
+
+ return postCheckFn(opCtx, numRecords);
+ }
+
+ /**
+ * Represents a fault where an index entry is deleted.
+ */
+ struct DeletionFault {};
+
+ /**
+ * Represents a fault where an index entry is additionally inserted.
+ */
+ struct InsertionFault {
+ // This value is inserted in the target index cell.
+ std::string insertedIndexValue;
+
+ InsertionFault(std::string iVal = "WRONG_KEY") : insertedIndexValue(iVal) {}
+ };
+
+ /**
+ * Represents a fault where the value of an index entry is replaced with a wrong value.
+ */
+ struct ReplacementFault {
+ // The actual index value is replaced with this given value in the target index cell.
+ std::string updatedIndexValue;
+
+ ReplacementFault(std::string uVal = "WRONG_KEY") : updatedIndexValue(uVal) {}
+ };
+
+ /**
+ * Represents an index corruption in field represented by 'fieldIndex' for a document with id
+ * equals to 'docIndex'. The actual fault can be one of the 'DeletionFault', 'InsertionFault',
+ * or 'ReplacementFault' options.
+ */
+ struct InjectedCorruption {
+ int fieldIndex;
+ int docIndex;
+ std::variant<DeletionFault, InsertionFault, ReplacementFault> fault;
+
+ InjectedCorruption(int fieldIdx,
+ int docIdx,
+ std::variant<DeletionFault, InsertionFault, ReplacementFault> f)
+ : fieldIndex(fieldIdx), docIndex(docIdx), fault(f) {}
+
+
+ /**
+ * Returns a field index for a field that doesn't exists. As field indexes are assumed to be
+ * non-negative, it's guaranteed that a negative field index does not exist. 'fldId' is used
+ * to produce more specific negative numbers for the non-existing field index to help with
+ * pinpointing the test failures.
+ */
+ static int getNonExistentFieldIndex(const int fldId) {
+ return -fldId;
+ }
+ };
+
+ int docIndexToRowId(const int docIndex) {
+ return docIndex + 1L;
+ }
+
+ /**
+ * This method runs a column-store index test on 'nss' via a call to 'runColumnStoreIndexTest'.
+ * First, it populates an 'nss' collection (with a column-store index defined on it) with
+ * 'numDocs' documents, where each document has 'numFields' fields. Then, applies a series of
+ * 'corruptions' on the column-store index. Finally, the index validation is ran on this
+ * collection (which now has a corrupted column-store index) and the validation results are
+ * returned.
+ *
+ * Note: passing 'doBackgroundValidation = true' performs both foreground and background
+ * validations. However, this can only be done in unit-tests that have a single call to
+ * this method.
+ *
+ * Returns the list of validation results.
+ */
+ std::vector<std::pair<BSONObj, ValidateResults>> validateIndexCorruptions(
+ const NamespaceString& nss,
+ const int numFields,
+ const int numDocs,
+ const std::vector<InjectedCorruption> corruptions,
+ const bool doBackgroundValidation = false) {
+ return runColumnStoreIndexTest(
+ nss,
+ /* insertDocsFn */
+ [&](OperationContext* opCtx, repl::StorageInterface* storageInterface) -> int {
+ return insertDataRangeForNumFields(
+ nss, opCtx, /*startIDNum*/ 1, /*numDocs*/ numDocs, /*numFields*/ numFields);
+ },
+ // modifyIndexContentsFn: For each corruption specified, introduces the corruption and
+ // then in a separate transaction ensures the corruption is now present.
+ [&](OperationContext* opCtx, ColumnStore* columnStore, int numRecords) -> void {
+ const auto getPath = [](int corruptedFldIndex) -> std::string {
+ return "a" + std::to_string(corruptedFldIndex);
+ };
+ const auto seekToCorruptedIndexEntry =
+ [&](int corruptedFldIndex,
+ int corruptedDocIndex) -> boost::optional<FullCellValue> {
+ auto cursor = columnStore->newCursor(opCtx, getPath(corruptedFldIndex));
+ ASSERT(cursor);
+ const auto res =
+ cursor->seekAtOrPast(RowId(docIndexToRowId(corruptedDocIndex)));
+ return res ? FullCellValue(res.value()) : boost::optional<FullCellValue>();
+ };
+
+ for (const auto& corruption : corruptions) {
+ const int corruptedFldIndex = corruption.fieldIndex;
+ const int corruptedDocIndex = corruption.docIndex;
+
+ const auto preCorruptionCell =
+ seekToCorruptedIndexEntry(corruptedFldIndex, corruptedDocIndex);
+ // Apply the requested corruption in a transaction.
+ {
+ WriteUnitOfWork wuow(opCtx);
+ const auto cursor = columnStore->newWriteCursor(opCtx);
+ if (std::holds_alternative<ReplacementFault>(corruption.fault)) {
+ const auto toVal =
+ std::get<ReplacementFault>(corruption.fault).updatedIndexValue;
+ columnStore->update(opCtx,
+ getPath(corruptedFldIndex),
+ preCorruptionCell->rid,
+ StringData(toVal));
+ } else if (std::holds_alternative<DeletionFault>(corruption.fault)) {
+ columnStore->remove(
+ opCtx, getPath(corruptedFldIndex), preCorruptionCell->rid);
+ } else if (std::holds_alternative<InsertionFault>(corruption.fault)) {
+ const auto toVal =
+ std::get<InsertionFault>(corruption.fault).insertedIndexValue;
+ columnStore->insert(opCtx,
+ getPath(corruptedFldIndex),
+ RowId(docIndexToRowId(corruptedDocIndex)),
+ StringData(toVal));
+ } else {
+ MONGO_UNREACHABLE;
+ }
+ wuow.commit();
+ }
+
+ // Confirm the requested corruption is actually applied (in a separate
+ // transaction).
+ {
+
+ if (std::holds_alternative<ReplacementFault>(corruption.fault)) {
+ const auto toVal =
+ std::get<ReplacementFault>(corruption.fault).updatedIndexValue;
+ const auto corruptedCell =
+ seekToCorruptedIndexEntry(corruptedFldIndex, corruptedDocIndex);
+ ASSERT_EQ(corruptedCell->path, getPath(corruptedFldIndex));
+ ASSERT_EQ(corruptedCell->rid, preCorruptionCell->rid);
+ ASSERT_EQ(corruptedCell->value, StringData(toVal));
+ } else if (std::holds_alternative<DeletionFault>(corruption.fault)) {
+ const auto corruptedCell =
+ seekToCorruptedIndexEntry(corruptedFldIndex, corruptedDocIndex);
+ if (numDocs == 1 || corruptedDocIndex == numDocs - 1) {
+ ASSERT_FALSE(corruptedCell);
+ } else {
+ ASSERT_EQ(corruptedCell->path, getPath(corruptedFldIndex));
+ ASSERT_GT(corruptedCell->rid, preCorruptionCell->rid);
+ }
+ } else if (std::holds_alternative<InsertionFault>(corruption.fault)) {
+ const auto toVal =
+ std::get<InsertionFault>(corruption.fault).insertedIndexValue;
+ const auto corruptedCell =
+ seekToCorruptedIndexEntry(corruptedFldIndex, corruptedDocIndex);
+ ASSERT_EQ(corruptedCell->path, getPath(corruptedFldIndex));
+ ASSERT_EQ(corruptedCell->rid,
+ RowId(docIndexToRowId(corruptedDocIndex)));
+ ASSERT_EQ(corruptedCell->value, StringData(toVal));
+ } else {
+ MONGO_UNREACHABLE;
+ }
+ }
+ }
+ },
+ /* postCheckFn */
+ [&](OperationContext* opCtx,
+ int numRecords) -> std::vector<std::pair<BSONObj, ValidateResults>> {
+ auto serviceContext = opCtx->getServiceContext();
+
+ // Confirm there is an expected validation error
+ std::vector<std::pair<BSONObj, ValidateResults>> results;
+
+ if (doBackgroundValidation) {
+ // Background validation must be done in a separate thread due to the
+ // assumptions made in its implementation.
+ stdx::thread runBackgroundValidate =
+ stdx::thread([&serviceContext, &numRecords, &nss, &results] {
+ ThreadClient tc("BackgroundValidate-thread", serviceContext);
+ auto threadOpCtx = tc->makeOperationContext();
+ auto bgResults = backgroundValidate(nss,
+ threadOpCtx.get(),
+ /*valid*/ false,
+ /*numRecords*/ numRecords,
+ /*numInvalidDocuments*/ 0,
+ /*numErrors*/ 1,
+ /*runForegroundAsWell*/ false);
+ results.insert(results.end(), bgResults.begin(), bgResults.end());
+ });
+ // Make sure the background validation finishes successfully.
+ runBackgroundValidate.join();
+ }
+
+ const auto fgResults = foregroundValidate(nss,
+ opCtx,
+ /*valid*/ false,
+ /*numRecords*/ numRecords,
+ /*numInvalidDocuments*/ 0,
+ /*numErrors*/ 1);
+
+ results.insert(results.end(), fgResults.begin(), fgResults.end());
+
+ return results;
+ });
+ }
+};
+
// Verify that calling validate() on an empty collection with different validation levels returns an
// OK status.
TEST_F(CollectionValidationTest, ValidateEmpty) {
- foregroundValidate(operationContext(),
+ foregroundValidate(kNss,
+ operationContext(),
/*valid*/ true,
/*numRecords*/ 0,
/*numInvalidDocuments*/ 0,
/*numErrors*/ 0);
}
TEST_F(CollectionValidationDiskTest, BackgroundValidateEmpty) {
- backgroundValidate(operationContext(),
+ backgroundValidate(kNss,
+ operationContext(),
/*valid*/ true,
/*numRecords*/ 0,
/*numInvalidDocuments*/ 0,
@@ -230,7 +545,8 @@ TEST_F(CollectionValidationDiskTest, BackgroundValidateEmpty) {
// Verify calling validate() on a nonempty collection with different validation levels.
TEST_F(CollectionValidationTest, Validate) {
auto opCtx = operationContext();
- foregroundValidate(opCtx,
+ foregroundValidate(kNss,
+ opCtx,
/*valid*/ true,
/*numRecords*/ insertDataRange(opCtx, 0, 5),
/*numInvalidDocuments*/ 0,
@@ -238,7 +554,8 @@ TEST_F(CollectionValidationTest, Validate) {
}
TEST_F(CollectionValidationDiskTest, BackgroundValidate) {
auto opCtx = operationContext();
- backgroundValidate(opCtx,
+ backgroundValidate(kNss,
+ opCtx,
/*valid*/ true,
/*numRecords*/ insertDataRange(opCtx, 0, 5),
/*numInvalidDocuments*/ 0,
@@ -249,7 +566,8 @@ TEST_F(CollectionValidationDiskTest, BackgroundValidate) {
// Verify calling validate() on a collection with an invalid document.
TEST_F(CollectionValidationTest, ValidateError) {
auto opCtx = operationContext();
- foregroundValidate(opCtx,
+ foregroundValidate(kNss,
+ opCtx,
/*valid*/ false,
/*numRecords*/ setUpInvalidData(opCtx),
/*numInvalidDocuments*/ 1,
@@ -257,7 +575,8 @@ TEST_F(CollectionValidationTest, ValidateError) {
}
TEST_F(CollectionValidationDiskTest, BackgroundValidateError) {
auto opCtx = operationContext();
- backgroundValidate(opCtx,
+ backgroundValidate(kNss,
+ opCtx,
/*valid*/ false,
/*numRecords*/ setUpInvalidData(opCtx),
/*numInvalidDocuments*/ 1,
@@ -268,7 +587,8 @@ TEST_F(CollectionValidationDiskTest, BackgroundValidateError) {
// Verify calling validate() with enforceFastCount=true.
TEST_F(CollectionValidationTest, ValidateEnforceFastCount) {
auto opCtx = operationContext();
- foregroundValidate(opCtx,
+ foregroundValidate(kNss,
+ opCtx,
/*valid*/ true,
/*numRecords*/ insertDataRange(opCtx, 0, 5),
/*numInvalidDocuments*/ 0,
@@ -306,7 +626,7 @@ TEST_F(CollectionValidationDiskTest, BackgroundValidateRunsConcurrentlyWithWrite
ThreadClient tc("BackgroundValidateConcurrentWithCRUD-thread", serviceContext);
auto threadOpCtx = tc->makeOperationContext();
backgroundValidate(
- threadOpCtx.get(), true, numRecords, 0, 0, /*runForegroundAsWell*/ false);
+ kNss, threadOpCtx.get(), true, numRecords, 0, 0, /*runForegroundAsWell*/ false);
});
// Wait until validate starts and hangs mid-way on a failpoint, then do concurrent writes,
@@ -319,7 +639,8 @@ TEST_F(CollectionValidationDiskTest, BackgroundValidateRunsConcurrentlyWithWrite
runBackgroundValidate.join();
// Run regular foreground collection validation to make sure everything is OK.
- foregroundValidate(opCtx,
+ foregroundValidate(kNss,
+ opCtx,
/*valid*/ true,
/*numRecords*/ numRecords + numRecords2,
0,
@@ -376,7 +697,7 @@ TEST_F(CollectionValidationTest, ValidateOldUniqueIndexKeyWarning) {
// Validate the collection here as a sanity check before we modify the index contents in-place.
foregroundValidate(
- opCtx, /*valid*/ true, /*numRecords*/ 1, /*numInvalidDocuments*/ 0, /*numErrors*/ 0);
+ kNss, opCtx, /*valid*/ true, /*numRecords*/ 1, /*numInvalidDocuments*/ 0, /*numErrors*/ 0);
// Update existing entry in index to pre-4.2 format without record id in key string.
{
@@ -429,21 +750,22 @@ TEST_F(CollectionValidationTest, ValidateOldUniqueIndexKeyWarning) {
}
}
- auto results = foregroundValidate(opCtx,
- /*valid*/ true,
- /*numRecords*/ 1,
- /*numInvalidDocuments*/ 0,
- /*numErrors*/ 0);
+ const auto results = foregroundValidate(kNss,
+ opCtx,
+ /*valid*/ true,
+ /*numRecords*/ 1,
+ /*numInvalidDocuments*/ 0,
+ /*numErrors*/ 0);
ASSERT_EQ(results.size(), 2);
for (const auto& result : results) {
const auto& validateResults = result.second;
BSONObjBuilder builder;
- bool debugging = true;
+ const bool debugging = true;
validateResults.appendToResultObj(&builder, debugging);
- auto obj = builder.obj();
+ const auto obj = builder.obj();
ASSERT(validateResults.valid) << obj;
- auto warningsWithoutTransientErrors = omitTransientWarnings(validateResults);
+ const auto warningsWithoutTransientErrors = omitTransientWarnings(validateResults);
ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 1U) << obj;
ASSERT_STRING_CONTAINS(warningsWithoutTransientErrors.warnings[0],
"Unique index a_1 has one or more keys in the old format")
@@ -451,5 +773,353 @@ TEST_F(CollectionValidationTest, ValidateOldUniqueIndexKeyWarning) {
}
}
+/**
+ * Checks whether a given 'entry' is equal to any element in the 'list'.
+ */
+bool equalsAny(const std::string& entry, const std::vector<std::string>& list) {
+ return std::any_of(
+ list.begin(), list.end(), [&entry](const std::string& other) { return entry == other; });
+}
+
+// Exhaustively tests having one error in the column-store index by updating one index entry with an
+// invalid value in different parts of the index on collections with different number of columns and
+// documents.
+TEST_F(CollectionValidationColumnStoreIndexTest, SingleInvalidIndexEntryCSI) {
+ const int kNumFields = 4;
+ const int kMaxNumDocs = 4;
+
+ int testCaseIdx = 0;
+ for (int numFields = 1; numFields <= kNumFields; ++numFields) {
+ for (int numDocs = 1; numDocs <= kMaxNumDocs; ++numDocs) {
+ for (int corruptedFldIndex = 1; corruptedFldIndex <= numFields; ++corruptedFldIndex) {
+ for (int corruptedDocIndex = 0; corruptedDocIndex < numDocs; ++corruptedDocIndex) {
+ const NamespaceString nss(kNss.toString() + std::to_string(++testCaseIdx));
+
+ // Create collection nss for unit tests to use.
+ const CollectionOptions defaultCollectionOptions;
+ ASSERT_OK(storageInterface()->createCollection(
+ operationContext(), nss, defaultCollectionOptions));
+
+ const auto results = validateIndexCorruptions(
+ nss,
+ numFields,
+ numDocs,
+ /* column-store index corruptions */
+ {{corruptedFldIndex,
+ corruptedDocIndex,
+ /* Update the current index entry with an invalid value. */
+ ReplacementFault("WRONG_" + std::to_string(corruptedFldIndex) + "_" +
+ std::to_string(corruptedDocIndex))}},
+ /* doBackgroundValidation */ true);
+
+ ASSERT_EQ(results.size(), 3);
+
+ for (const auto& result : results) {
+ const auto& validateResults = result.second;
+ BSONObjBuilder builder;
+ const bool debugging = true;
+ validateResults.appendToResultObj(&builder, debugging);
+ const auto obj = builder.obj();
+ ASSERT_FALSE(validateResults.valid) << obj;
+
+ const auto warningsWithoutTransientErrors =
+ omitTransientWarnings(validateResults);
+ ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 2U) << obj;
+ ASSERT(equalsAny("Detected 1 missing index entries.",
+ warningsWithoutTransientErrors.warnings))
+ << obj;
+ ASSERT(equalsAny("Detected 1 extra index entries.",
+ warningsWithoutTransientErrors.warnings))
+ << obj;
+
+ ASSERT_EQ(validateResults.errors.size(), 1U) << obj;
+ ASSERT_EQ(validateResults.errors[0],
+ "Index with name '$**_columnstore' has inconsistencies.")
+ << obj;
+
+ const auto& extraEntries = validateResults.extraIndexEntries;
+ ASSERT_EQ(extraEntries.size(), 1U) << obj;
+ ASSERT_EQ(extraEntries[0]["indexName"].String(), "$**_columnstore") << obj;
+ ASSERT_EQ(extraEntries[0]["recordId"].Long(),
+ docIndexToRowId(corruptedDocIndex))
+ << obj;
+ ASSERT_EQ(extraEntries[0]["rowId"].Long(),
+ docIndexToRowId(corruptedDocIndex))
+ << obj;
+ ASSERT_EQ(extraEntries[0]["indexPath"].String(),
+ "a" + std::to_string(corruptedFldIndex))
+ << obj;
+
+ const auto& missingEntries = validateResults.missingIndexEntries;
+ ASSERT_EQ(missingEntries.size(), 1U) << obj;
+ ASSERT_EQ(missingEntries[0]["indexName"].String(), "$**_columnstore")
+ << obj;
+ ASSERT_EQ(missingEntries[0]["recordId"].Long(),
+ docIndexToRowId(corruptedDocIndex))
+ << obj;
+ ASSERT_EQ(missingEntries[0]["rowId"].Long(),
+ docIndexToRowId(corruptedDocIndex))
+ << obj;
+ ASSERT_EQ(missingEntries[0]["indexPath"].String(),
+ "a" + std::to_string(corruptedFldIndex))
+ << obj;
+
+ ASSERT_EQ(validateResults.corruptRecords.size(), 0U) << obj;
+ ASSERT_EQ(validateResults.numRemovedCorruptRecords, 0U) << obj;
+ ASSERT_EQ(validateResults.numRemovedExtraIndexEntries, 0U) << obj;
+ ASSERT_EQ(validateResults.numInsertedMissingIndexEntries, 0U) << obj;
+ ASSERT_EQ(validateResults.numDocumentsMovedToLostAndFound, 0U) << obj;
+ ASSERT_EQ(validateResults.numOutdatedMissingIndexEntry, 0U) << obj;
+ }
+ }
+ }
+ }
+ }
+}
+
+// Exhaustively tests having one error in the column-store index by adding an extra index entry on
+// collections with different number of columns and documents.
+TEST_F(CollectionValidationColumnStoreIndexTest, SingleExtraIndexEntry) {
+ const int kNumFields = 4;
+ const int kMaxNumDocs = 4;
+
+ std::vector<std::pair<int, int>> extraIndexEntryCorruptions = {
+ /* non-existent field on an existing document */
+ {/*corruptedFldIndex*/ InjectedCorruption::getNonExistentFieldIndex(1),
+ /*corruptedDocIndex*/ 0},
+ /* non-existent field on a non-existent document */
+ {/*corruptedFldIndex*/ InjectedCorruption::getNonExistentFieldIndex(2),
+ /*corruptedDocIndex*/ kMaxNumDocs * 10}};
+ for (int corruptedFldIndex = 1; corruptedFldIndex <= kNumFields; ++corruptedFldIndex) {
+ /* existing field on a non-existent document */
+ extraIndexEntryCorruptions.push_back(
+ {corruptedFldIndex, /*corruptedDocIndex*/ kMaxNumDocs * 10});
+ }
+
+ int testCaseIdx = 0;
+ for (int numFields = 1; numFields <= kNumFields; ++numFields) {
+ for (int numDocs = 1; numDocs <= kMaxNumDocs; ++numDocs) {
+ for (const auto& corruption : extraIndexEntryCorruptions) {
+ const int corruptedFldIndex = corruption.first;
+ const int corruptedDocIndex = corruption.second;
+
+ const auto nss = NamespaceString(kNss.toString() + std::to_string(++testCaseIdx));
+
+ // Create collection nss for unit tests to use.
+ const CollectionOptions defaultCollectionOptions;
+ ASSERT_OK(storageInterface()->createCollection(
+ operationContext(), nss, defaultCollectionOptions));
+
+ const auto results = validateIndexCorruptions(
+ nss,
+ numFields,
+ numDocs,
+ /* column-store index corruptions */
+ {{corruptedFldIndex,
+ corruptedDocIndex,
+ /* Insert an extra index entry. */
+ InsertionFault("WRONG_" + std::to_string(corruptedFldIndex) + "_" +
+ std::to_string(corruptedDocIndex))}},
+ /* doBackgroundValidation */ true);
+
+ ASSERT_EQ(results.size(), 3);
+
+ for (const auto& result : results) {
+ const auto& validateResults = result.second;
+ BSONObjBuilder builder;
+ const bool debugging = true;
+ validateResults.appendToResultObj(&builder, debugging);
+ const auto obj = builder.obj();
+ ASSERT_FALSE(validateResults.valid) << obj;
+
+ const auto warningsWithoutTransientErrors =
+ omitTransientWarnings(validateResults);
+ ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 1U) << obj;
+ ASSERT(fmt::format("Detected {} extra index entries.", 1) ==
+ warningsWithoutTransientErrors.warnings[0])
+ << obj;
+
+ ASSERT_EQ(validateResults.errors.size(), 1U) << obj;
+ ASSERT_EQ(validateResults.errors[0],
+ "Index with name '$**_columnstore' has inconsistencies.")
+ << obj;
+
+ const auto& extraEntries = validateResults.extraIndexEntries;
+ ASSERT_EQ(extraEntries.size(), 1) << obj;
+ ASSERT_EQ(extraEntries[0]["indexName"].String(), "$**_columnstore") << obj;
+ ASSERT_EQ(extraEntries[0]["recordId"].Long(),
+ docIndexToRowId(corruptedDocIndex))
+ << obj;
+ ASSERT_EQ(extraEntries[0]["rowId"].Long(), docIndexToRowId(corruptedDocIndex))
+ << obj;
+ ASSERT_EQ(extraEntries[0]["indexPath"].String(),
+ "a" + std::to_string(corruptedFldIndex))
+ << obj;
+
+ ASSERT_EQ(validateResults.missingIndexEntries.size(), 0U) << obj;
+ ASSERT_EQ(validateResults.corruptRecords.size(), 0U) << obj;
+ ASSERT_EQ(validateResults.numRemovedCorruptRecords, 0U) << obj;
+ ASSERT_EQ(validateResults.numRemovedExtraIndexEntries, 0U) << obj;
+ ASSERT_EQ(validateResults.numInsertedMissingIndexEntries, 0U) << obj;
+ ASSERT_EQ(validateResults.numDocumentsMovedToLostAndFound, 0U) << obj;
+ ASSERT_EQ(validateResults.numOutdatedMissingIndexEntry, 0U) << obj;
+ }
+ }
+ }
+ }
+}
+
+// Exhaustively tests having one error in the column-store index by removing one index entry from
+// different parts of the index on collections with different number of columns and documents.
+TEST_F(CollectionValidationColumnStoreIndexTest, SingleMissingIndexEntryCSI) {
+ const int kNumFields = 4;
+ const int kMaxNumDocs = 4;
+
+ int testCaseIdx = 0;
+ for (int numFields = 1; numFields <= kNumFields; ++numFields) {
+ for (int numDocs = 1; numDocs <= kMaxNumDocs; ++numDocs) {
+ for (int corruptedFldIndex = 1; corruptedFldIndex <= numFields; ++corruptedFldIndex) {
+ for (int corruptedDocIndex = 0; corruptedDocIndex < numDocs; ++corruptedDocIndex) {
+ const NamespaceString nss(kNss.toString() + std::to_string(++testCaseIdx));
+
+ // Create collection nss for unit tests to use.
+ const CollectionOptions defaultCollectionOptions;
+ ASSERT_OK(storageInterface()->createCollection(
+ operationContext(), nss, defaultCollectionOptions));
+
+ const auto results =
+ validateIndexCorruptions(nss,
+ numFields,
+ numDocs,
+ /* column-store index corruptions */
+ {{corruptedFldIndex,
+ corruptedDocIndex,
+ /* Remove the existing index entry. */
+ DeletionFault()}},
+ /* doBackgroundValidation */ true);
+
+ ASSERT_EQ(results.size(), 3);
+
+ for (const auto& result : results) {
+ const auto& validateResults = result.second;
+ BSONObjBuilder builder;
+ const bool debugging = true;
+ validateResults.appendToResultObj(&builder, debugging);
+ const auto obj = builder.obj();
+ ASSERT_FALSE(validateResults.valid) << obj;
+
+ const auto warningsWithoutTransientErrors =
+ omitTransientWarnings(validateResults);
+ ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 1U) << obj;
+ ASSERT("Detected 1 missing index entries." ==
+ warningsWithoutTransientErrors.warnings[0])
+ << obj;
+
+ ASSERT_EQ(validateResults.errors.size(), 1U) << obj;
+ ASSERT_EQ(validateResults.errors[0],
+ "Index with name '$**_columnstore' has inconsistencies.")
+ << obj;
+
+ const auto& missingEntries = validateResults.missingIndexEntries;
+ ASSERT_EQ(missingEntries.size(), 1U) << obj;
+ ASSERT_EQ(missingEntries[0]["indexName"].String(), "$**_columnstore")
+ << obj;
+ ASSERT_EQ(missingEntries[0]["recordId"].Long(),
+ docIndexToRowId(corruptedDocIndex))
+ << obj;
+ ASSERT_EQ(missingEntries[0]["rowId"].Long(),
+ docIndexToRowId(corruptedDocIndex))
+ << obj;
+ ASSERT_EQ(missingEntries[0]["indexPath"].String(),
+ "a" + std::to_string(corruptedFldIndex))
+ << obj;
+
+ ASSERT_EQ(validateResults.extraIndexEntries.size(), 0U) << obj;
+ ASSERT_EQ(validateResults.corruptRecords.size(), 0U) << obj;
+ ASSERT_EQ(validateResults.numRemovedCorruptRecords, 0U) << obj;
+ ASSERT_EQ(validateResults.numRemovedExtraIndexEntries, 0U) << obj;
+ ASSERT_EQ(validateResults.numInsertedMissingIndexEntries, 0U) << obj;
+ ASSERT_EQ(validateResults.numDocumentsMovedToLostAndFound, 0U) << obj;
+ ASSERT_EQ(validateResults.numOutdatedMissingIndexEntry, 0U) << obj;
+ }
+ }
+ }
+ }
+ }
+}
+
+// Tests having multiple errors in the column-store index by updating several index entries with an
+// invalid value in different parts of the index on a collection with multiple columns and multiple
+// documents.
+TEST_F(CollectionValidationColumnStoreIndexTest, MultipleInvalidIndexEntryCSI) {
+ const int numFields = 10;
+ const int numDocs = 50;
+
+ auto results = validateIndexCorruptions(
+ kNss,
+ numFields,
+ numDocs,
+ /* column-store index corruptions */
+ {{/* corruptedFldIndex */ 5,
+ /* corruptedDocIndex */ 9,
+ /* Remove the existing index entry. */
+ DeletionFault()},
+ {/* corruptedFldIndex */ 7,
+ /* corruptedDocIndex */ 2,
+ /* Update the current index entry with an invalid value. */
+ ReplacementFault()},
+ {/* corruptedFldIndex */ 9,
+ /* corruptedDocIndex */ 500,
+ /* Insert an extra index entry for a non-exisiting document. */
+ InsertionFault()},
+ {/* corruptedFldIndex */ InjectedCorruption::getNonExistentFieldIndex(1),
+ /* corruptedDocIndex */ 5,
+ /* Insert an extra index entry for a non-existing field of an existing document. */
+ InsertionFault()},
+ {/* corruptedFldIndex */ InjectedCorruption::getNonExistentFieldIndex(2),
+ /* corruptedDocIndex */ 600,
+ /* Insert an extra index entry for a non-existing field of a non-existing document. */
+ InsertionFault()},
+ {/* corruptedFldIndex */ 2,
+ /* corruptedDocIndex */ 33,
+ /* Update the current index entry with an invalid value. */
+ ReplacementFault()}},
+ /* doBackgroundValidation */ true);
+
+ ASSERT_EQ(results.size(), 3);
+
+ for (const auto& result : results) {
+ const auto& validateResults = result.second;
+ BSONObjBuilder builder;
+ const bool debugging = true;
+ validateResults.appendToResultObj(&builder, debugging);
+ const auto obj = builder.obj();
+ ASSERT_FALSE(validateResults.valid) << obj;
+
+ const auto warningsWithoutTransientErrors = omitTransientWarnings(validateResults);
+ ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 2U) << obj;
+ ASSERT(
+ equalsAny("Detected 3 missing index entries.", warningsWithoutTransientErrors.warnings))
+ << obj;
+ ASSERT(
+ equalsAny("Detected 5 extra index entries.", warningsWithoutTransientErrors.warnings))
+ << obj;
+
+ ASSERT_EQ(validateResults.errors.size(), 1U) << obj;
+ ASSERT_EQ(validateResults.errors[0],
+ "Index with name '$**_columnstore' has inconsistencies.")
+ << obj;
+
+ ASSERT_EQ(validateResults.missingIndexEntries.size(), 3U) << obj;
+ ASSERT_EQ(validateResults.extraIndexEntries.size(), 5U) << obj;
+ ASSERT_EQ(validateResults.corruptRecords.size(), 0U) << obj;
+ ASSERT_EQ(validateResults.numRemovedCorruptRecords, 0U) << obj;
+ ASSERT_EQ(validateResults.numRemovedExtraIndexEntries, 0U) << obj;
+ ASSERT_EQ(validateResults.numInsertedMissingIndexEntries, 0U) << obj;
+ ASSERT_EQ(validateResults.numDocumentsMovedToLostAndFound, 0U) << obj;
+ ASSERT_EQ(validateResults.numOutdatedMissingIndexEntry, 0U) << obj;
+ }
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/catalog/column_index_consistency.cpp b/src/mongo/db/catalog/column_index_consistency.cpp
new file mode 100644
index 00000000000..dfa6d65b7e6
--- /dev/null
+++ b/src/mongo/db/catalog/column_index_consistency.cpp
@@ -0,0 +1,282 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/catalog/column_index_consistency.h"
+#include "mongo/db/concurrency/exception_util.h"
+
+namespace mongo {
+
+int64_t ColumnIndexConsistency::traverseIndex(OperationContext* opCtx,
+ const IndexCatalogEntry* index,
+ ProgressMeterHolder& _progress,
+ ValidateResults* results) {
+ const auto indexName = index->descriptor()->indexName();
+
+ // Ensure that this index has an open index cursor.
+ const auto csiCursorIt = _validateState->getColumnStoreCursors().find(indexName);
+ invariant(csiCursorIt != _validateState->getColumnStoreCursors().end());
+
+ const std::unique_ptr<ColumnStore::Cursor>& csiCursor = csiCursorIt->second;
+
+ int64_t numIndexEntries = 0;
+ // Traverse the index in this for loop.
+ for (auto cell = csiCursor->seekAtOrPast("", ColumnStore::kNullRowId); cell;
+ cell = csiCursor->next()) {
+ {
+ stdx::unique_lock<Client> lk(*opCtx->getClient());
+ _progress.get(lk)->hit();
+ }
+ ++numIndexEntries;
+
+ if (numIndexEntries % kInterruptIntervalNumRecords == 0) {
+ // Periodically checks for interrupts and yields.
+ opCtx->checkForInterrupt();
+ _validateState->yield(opCtx);
+ }
+
+ if (_firstPhase) {
+ addIndexEntry(cell.get());
+ } else {
+ _updateSuspectList(cell.get(), results);
+ }
+ }
+
+ return numIndexEntries;
+}
+
+void ColumnIndexConsistency::traverseRecord(OperationContext* opCtx,
+ const CollectionPtr& coll,
+ const IndexCatalogEntry* index,
+ const RecordId& recordId,
+ const BSONObj& record,
+ ValidateResults* results) {
+ ColumnStoreAccessMethod* csam = dynamic_cast<ColumnStoreAccessMethod*>(index->accessMethod());
+
+ // Shred the record.
+ csam->getKeyGen().visitCellsForInsert(
+ record, [&](PathView path, const column_keygen::UnencodedCellView& unencodedCell) {
+ _cellBuilder.reset();
+ column_keygen::writeEncodedCell(unencodedCell, &_cellBuilder);
+ tassert(
+ 7106112, "RecordID cannot be a string for column store indexes", !recordId.isStr());
+ const auto cell = FullCellView{
+ path, recordId.getLong(), CellView(_cellBuilder.buf(), _cellBuilder.len())};
+ if (_firstPhase) {
+ addDocEntry(cell);
+ } else {
+ _updateSuspectList(cell, results);
+ }
+ });
+}
+
+void ColumnIndexConsistency::_addIndexEntryErrors(OperationContext* opCtx,
+ const IndexCatalogEntry* index,
+ ValidateResults* results) {
+
+ if (_firstPhase) {
+ return;
+ }
+
+ const auto indexName = index->descriptor()->indexName();
+
+ // Ensure that this index has an open index cursor.
+ const auto csiCursorIt = _validateState->getColumnStoreCursors().find(indexName);
+ invariant(csiCursorIt != _validateState->getColumnStoreCursors().end());
+
+ const auto& csiCursor = csiCursorIt->second;
+
+ const ColumnStoreAccessMethod* csam =
+ dynamic_cast<ColumnStoreAccessMethod*>(index->accessMethod());
+
+ for (const auto rowId : _suspects) {
+ // Gather all paths for this RecordId.
+ StringMap<int> paths;
+ PathValue nextPath = "";
+ while (auto next = csiCursor->seekAtOrPast(nextPath, rowId)) {
+ if (next->rid < rowId) {
+ nextPath.assign(next->path.rawData(), next->path.size());
+ continue;
+ }
+
+ if (next->rid == rowId) {
+ paths[next->path]++;
+ }
+ nextPath.assign(next->path.rawData(), next->path.size());
+ nextPath += '\x01'; // Next possible path (\0 is not allowed).
+ }
+
+ const RecordId recordId(rowId);
+ const auto& rowStoreCursor = _validateState->getTraverseRecordStoreCursor();
+ if (const auto record = rowStoreCursor->seekExact(opCtx, recordId); record) {
+ // Shred the document and check each path/value against csi(path, rowId).
+ csam->getKeyGen().visitCellsForInsert(
+ record->data.toBson(),
+ [&](PathView path, const column_keygen::UnencodedCellView& cell) {
+ _cellBuilder.reset();
+ column_keygen::writeEncodedCell(cell, &_cellBuilder);
+ const auto rowCell =
+ FullCellView{path, rowId, CellView(_cellBuilder.buf(), _cellBuilder.len())};
+
+ const auto csiCell = csiCursor->seekExact(path, rowId);
+ if (!csiCell) {
+ // Rowstore has entry that index doesn't
+ _missingIndexEntries.emplace_back(rowCell);
+ results->missingIndexEntries.push_back(
+ _generateInfo(csam->indexName(), recordId, path, rowId));
+ } else if (csiCell->value != rowCell.value) {
+ // Rowstore and index values diverge
+ _extraIndexEntries.emplace_back(csiCell.get());
+ _missingIndexEntries.emplace_back(rowCell);
+ results->missingIndexEntries.push_back(_generateInfo(
+ csam->indexName(), recordId, path, rowId, csiCell->value));
+ results->extraIndexEntries.push_back(
+ _generateInfo(csam->indexName(), recordId, path, rowId, rowCell.value));
+ }
+ if (paths.count(rowCell.path) == 1) {
+ paths.erase(rowCell.path);
+ } else {
+ paths[rowCell.path]--;
+ }
+ });
+ }
+
+ // Extra paths in index that don't exist in the row-store.
+ for (const auto& kv : paths) {
+ for (int i = 0; i < kv.second; i++) {
+ _extraIndexEntries.emplace_back(kv.first, rowId, ""_sd);
+ results->extraIndexEntries.push_back(
+ _generateInfo(csam->indexName(), recordId, kv.first, rowId));
+ }
+ }
+ }
+
+ if (!_missingIndexEntries.empty() || !_extraIndexEntries.empty()) {
+ StringBuilder ss;
+ ss << "Index with name '" << csam->indexName() << "' has inconsistencies.";
+ results->errors.push_back(ss.str());
+ results->indexResultsMap.at(csam->indexName()).valid = false;
+ }
+ if (!_missingIndexEntries.empty()) {
+ StringBuilder ss;
+ ss << "Detected " << _missingIndexEntries.size() << " missing index entries.";
+ results->warnings.push_back(ss.str());
+ results->valid = false;
+ }
+ if (!_extraIndexEntries.empty()) {
+ StringBuilder ss;
+ ss << "Detected " << _extraIndexEntries.size() << " extra index entries.";
+ results->warnings.push_back(ss.str());
+ results->valid = false;
+ }
+}
+
+void ColumnIndexConsistency::addIndexEntryErrors(OperationContext* opCtx,
+ ValidateResults* results) {
+ int numColumnStoreIndexes = 0;
+ for (const auto& index : _validateState->getIndexes()) {
+ const IndexDescriptor* descriptor = index->descriptor();
+ if (descriptor->getAccessMethodName() == IndexNames::COLUMN) {
+ ++numColumnStoreIndexes;
+ uassert(7106138,
+ "The current implementation only supports a single column-store index.",
+ numColumnStoreIndexes <= 1);
+ _addIndexEntryErrors(opCtx, index.get(), results);
+ }
+ }
+}
+
+void ColumnIndexConsistency::repairIndexEntries(OperationContext* opCtx, ValidateResults* results) {
+ // TODO SERVER-71560 Repair should be implemented.
+}
+
+void ColumnIndexConsistency::validateIndexKeyCount(OperationContext* opCtx,
+ const IndexCatalogEntry* index,
+ long long* numRecords,
+ IndexValidateResults& results) {
+ // Nothing to do.
+}
+
+// Generate info for the validate() command output.
+BSONObj ColumnIndexConsistency::_generateInfo(const std::string& indexName,
+ const RecordId& recordId,
+ const PathView path,
+ RowId rowId,
+ StringData value) {
+ BSONObjBuilder infoBuilder;
+
+ infoBuilder.append("indexName", indexName);
+ recordId.serializeToken("recordId", &infoBuilder);
+ if (rowId != ColumnStore::kNullRowId) {
+ infoBuilder.append("rowId", rowId);
+ }
+ if (!path.empty()) {
+ infoBuilder.append("indexPath", path);
+ }
+ if (!value.empty()) {
+ infoBuilder.append("value", value);
+ }
+ return infoBuilder.obj();
+}
+
+void ColumnIndexConsistency::addDocEntry(const FullCellView& val) {
+ _tabulateEntry(val, 1);
+ _numDocs++;
+}
+
+void ColumnIndexConsistency::addIndexEntry(const FullCellView& val) {
+ _tabulateEntry(val, -1);
+ _numIndexEntries++;
+}
+
+void ColumnIndexConsistency::_updateSuspectList(const FullCellView& cell,
+ ValidateResults* results) {
+ const auto rawHash = _hash(cell);
+ const auto hashLower = rawHash % _indexKeyBuckets.size();
+ const auto hashUpper = (rawHash / _indexKeyBuckets.size()) % _indexKeyBuckets.size();
+
+ if (_indexKeyBuckets[hashLower].indexKeyCount != 0 ||
+ _indexKeyBuckets[hashUpper].indexKeyCount != 0) {
+ _suspects.insert(cell.rid);
+ }
+}
+
+void ColumnIndexConsistency::_tabulateEntry(const FullCellView& cell, int step) {
+ const auto rawHash = _hash(cell);
+ const auto hashLower = rawHash % _indexKeyBuckets.size();
+ const auto hashUpper = (rawHash / _indexKeyBuckets.size()) % _indexKeyBuckets.size();
+ auto& lower = _indexKeyBuckets[hashLower];
+ auto& upper = _indexKeyBuckets[hashUpper];
+ const auto cellSz = cell.path.size() + sizeof(cell.rid) + cell.value.size();
+
+ lower.indexKeyCount += step;
+ lower.bucketSizeBytes += cellSz;
+ upper.indexKeyCount += step;
+ upper.bucketSizeBytes += cellSz;
+}
+} // namespace mongo
diff --git a/src/mongo/db/catalog/column_index_consistency.h b/src/mongo/db/catalog/column_index_consistency.h
new file mode 100644
index 00000000000..a503e8c7154
--- /dev/null
+++ b/src/mongo/db/catalog/column_index_consistency.h
@@ -0,0 +1,201 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <absl/hash/hash.h>
+#include <algorithm>
+#include <vector>
+
+#include "mongo/db/catalog/index_consistency.h"
+#include "mongo/db/catalog/throttle_cursor.h"
+#include "mongo/db/catalog/validate_state.h"
+#include "mongo/db/index/columns_access_method.h"
+#include "mongo/db/storage/column_store.h"
+#include "mongo/stdx/unordered_set.h"
+#include "mongo/util/progress_meter.h"
+
+namespace mongo {
+
+/**
+ * The ColumnIndexConsistency class is used to keep track of the index consistency in column-stor
+ * indexes. It does this by using the index keys from index entries and index keys generated from
+ * the document to ensure there is a one-to-one mapping for each key.
+ * In addition, an IndexObserver class can be hooked into the IndexAccessMethod to inform this class
+ * about changes to the indexes during a validation and compensate for them.
+ */
+class ColumnIndexConsistency final : protected IndexConsistency {
+
+public:
+ ColumnIndexConsistency(OperationContext* opCtx,
+ CollectionValidation::ValidateState* validateState,
+ const size_t numHashBuckets = kNumHashBuckets)
+ : IndexConsistency(opCtx, validateState, numHashBuckets) {}
+
+ void setSecondPhase() {
+ IndexConsistency::setSecondPhase();
+ }
+
+ /**
+ * Traverses the column-store index via 'cursor' and accumulates the traversal results.
+ */
+ int64_t traverseIndex(OperationContext* opCtx,
+ const IndexCatalogEntry* index,
+ ProgressMeterHolder& _progress,
+ ValidateResults* results);
+
+ /**
+ * Traverses all paths in a single record from the row-store via the given {'recordId','record'}
+ * pair and accumulates the traversal results.
+ */
+ void traverseRecord(OperationContext* opCtx,
+ const CollectionPtr& coll,
+ const IndexCatalogEntry* index,
+ const RecordId& recordId,
+ const BSONObj& recordBson,
+ ValidateResults* results);
+
+ /**
+ * Returns true if any value in the `_indexKeyCount` map is not equal to 0, otherwise return
+ * false.
+ */
+ bool haveEntryMismatch() const {
+ return std::any_of(_indexKeyBuckets.cbegin(), _indexKeyBuckets.cend(), [](auto b) {
+ return b.indexKeyCount != 0;
+ });
+ }
+
+ /**
+ * If repair mode enabled, try inserting _missingIndexEntries into indexes.
+ */
+ void repairIndexEntries(OperationContext* opCtx, ValidateResults* results);
+
+ /**
+ * Records the errors gathered from the second phase of index validation into the provided
+ * ValidateResultsMap and ValidateResults.
+ */
+ void addIndexEntryErrors(OperationContext* opCtx, ValidateResults* results);
+
+ /**
+ * Sets up this IndexConsistency object to limit memory usage in the second phase of index
+ * validation. Returns whether the memory limit is sufficient to report at least one index entry
+ * inconsistency and continue with the second phase of validation.
+ */
+ bool limitMemoryUsageForSecondPhase(ValidateResults* result) {
+ return true;
+ }
+
+ void validateIndexKeyCount(OperationContext* opCtx,
+ const IndexCatalogEntry* index,
+ long long* numRecords,
+ IndexValidateResults& results);
+
+ uint64_t getTotalIndexKeys() {
+ return _numIndexEntries;
+ }
+
+ //////////////////////////////////////////////////////////
+ // Beginning of methods being public for testing purposes
+ //////////////////////////////////////////////////////////
+ int64_t getNumDocs() const {
+ return _numDocs;
+ }
+
+ uint32_t getBucketCount(size_t bucketNum) const {
+ invariant(bucketNum < _indexKeyBuckets.size());
+ return _indexKeyBuckets[bucketNum].indexKeyCount;
+ }
+
+ uint32_t getBucketSizeBytes(size_t bucketNum) const {
+ invariant(bucketNum < _indexKeyBuckets.size());
+ return _indexKeyBuckets[bucketNum].bucketSizeBytes;
+ }
+
+ /**
+ * Accumulates the info about a cell extracted from a shredded row-store record.
+ */
+ void addDocEntry(const FullCellView& val);
+
+ /**
+ * Accumulates the info about a column-store index entry cell.
+ */
+ void addIndexEntry(const FullCellView& val);
+
+ static uint32_t _hash(const FullCellView& cell, uint64_t seed = 0) {
+ MONGO_STATIC_ASSERT_MSG(std::is_same<decltype(cell.rid), int64_t>::value,
+ "'rid' should be an int64 (i.e., it's not a class) so that we're "
+ "not hashing based on the compiler chosen layout for a struct.");
+
+ MONGO_STATIC_ASSERT_MSG(sizeof(uint64_t) <= sizeof(size_t),
+ "Unable to safely store a uint64_t value in a size_t variable");
+
+ size_t hash = static_cast<size_t>(seed);
+ boost::hash_combine(hash,
+ absl::hash_internal::CityHash64(cell.path.rawData(), cell.path.size()));
+ boost::hash_combine(hash,
+ absl::hash_internal::CityHash64(
+ reinterpret_cast<const char*>(&cell.rid), sizeof(cell.rid)));
+ boost::hash_combine(
+ hash, absl::hash_internal::CityHash64(cell.value.rawData(), cell.value.size()));
+ return hash;
+ }
+ //////////////////////////////////////////////////////////
+ // End of methods being public for testing purposes
+ //////////////////////////////////////////////////////////
+
+private:
+ ColumnIndexConsistency() = delete;
+
+ /**
+ * Pinpoints the errors from the accumulated information from traversal of both row-store and
+ * column-store index and adds these errors to 'results'.
+ */
+ void _addIndexEntryErrors(OperationContext* opCtx,
+ const IndexCatalogEntry* index,
+ ValidateResults* results);
+
+ void _tabulateEntry(const FullCellView& cell, int step);
+
+ BSONObj _generateInfo(const std::string& indexName,
+ const RecordId& recordId,
+ PathView path,
+ RowId rowId,
+ StringData value = "");
+
+ void _updateSuspectList(const FullCellView& cell, ValidateResults* results);
+
+ std::vector<FullCellValue> _missingIndexEntries;
+ std::vector<FullCellValue> _extraIndexEntries;
+
+ stdx::unordered_set<RowId> _suspects;
+ int64_t _numDocs = 0;
+ int64_t _numIndexEntries = 0;
+ BufBuilder _cellBuilder;
+};
+} // namespace mongo
diff --git a/src/mongo/db/catalog/column_index_consistency_test.cpp b/src/mongo/db/catalog/column_index_consistency_test.cpp
new file mode 100644
index 00000000000..0199b9627e1
--- /dev/null
+++ b/src/mongo/db/catalog/column_index_consistency_test.cpp
@@ -0,0 +1,261 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/catalog/catalog_test_fixture.h"
+#include "mongo/db/catalog/column_index_consistency.h"
+#include "mongo/idl/server_parameter_test_util.h"
+#include "mongo/stdx/unordered_set.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo {
+
+namespace {
+
+const NamespaceString kNss = NamespaceString("test.t");
+
+class ColumnIndexConsistencyTest : public CatalogTestFixture {
+protected:
+ ColumnIndexConsistencyTest(Options options = {}) : CatalogTestFixture(std::move(options)) {}
+
+ ColumnIndexConsistency createColumnStoreConsistency(
+ const size_t numHashBuckets = IndexConsistency::kNumHashBuckets) {
+ return ColumnIndexConsistency(operationContext(), _validateState.get(), numHashBuckets);
+ }
+
+ void setUp() override {
+ CatalogTestFixture::setUp();
+
+ auto opCtx = operationContext();
+
+ // Create collection kNss for unit tests to use. It will possess a default _id index.
+ const CollectionOptions defaultCollectionOptions;
+ ASSERT_OK(storageInterface()->createCollection(opCtx, kNss, defaultCollectionOptions));
+
+ _validateState = std::make_unique<CollectionValidation::ValidateState>(
+ opCtx,
+ kNss,
+ CollectionValidation::ValidateMode::kForeground,
+ CollectionValidation::RepairMode::kNone);
+ };
+
+ void tearDown() override {
+ _validateState.reset();
+ CatalogTestFixture::tearDown();
+ }
+
+ std::unique_ptr<CollectionValidation::ValidateState> _validateState;
+};
+
+const std::vector<FullCellView> kEntries{
+ {"path1"_sd, 1, "value1"_sd},
+ {"path1"_sd, 2, "value1"_sd},
+ {"path1"_sd, 3, "value1"_sd},
+
+ {"path1"_sd, 4, "value1"_sd},
+ {"path2"_sd, 4, "value1"_sd},
+ {"path3"_sd, 4, "value1"_sd},
+
+ {"path4"_sd, 5, "value1"_sd},
+ {"path4"_sd, 5, "value2"_sd},
+ {"path4"_sd, 5, "value3"_sd},
+
+ {"path4"_sd, 0x1000'0000'0000'0001, "value3"_sd},
+ {"path4"_sd, 0x2000'0000'0000'0001, "value3"_sd},
+ {"path4"_sd, 0x3000'0000'0000'0001, "value3"_sd},
+
+ {"path44"_sd, 5, "value3"_sd},
+ {"path4"_sd, 5, "value33"_sd},
+
+ {"path1"_sd, 1, "1value1"_sd},
+ {"path1"_sd, 2, "1value1"_sd},
+ {"path1"_sd, 3, "1value1"_sd},
+
+ {"path1"_sd, 4, "1value1"_sd},
+ {"path2"_sd, 4, "1value1"_sd},
+ {"path3"_sd, 4, "1value1"_sd},
+
+ {"path4"_sd, 5, "1value1"_sd},
+ {"path4"_sd, 5, "1value2"_sd},
+ {"path4"_sd, 5, "1value3"_sd},
+
+ {"path4"_sd, 0x1000'0000'0000'0001, "value32"_sd},
+ {"path4"_sd, 0x2000'0000'0000'0001, "value33"_sd},
+ {"path4"_sd, 0x3000'0000'0000'0001, "value34"_sd},
+
+ {"path44"_sd, 5, "4value3"_sd},
+ {"path4"_sd, 5, "4value33"_sd},
+
+ // A couple duplicates
+ {"path44"_sd, 5, "4value3"_sd},
+ {"path4"_sd, 5, "4value33"_sd},
+
+ {"path.some.path"_sd, 5, "a dog likes to chew"_sd},
+ {"path.some.path"_sd, 6, "a dog likes to bite"_sd},
+ {"path.some.path"_sd, 7, "a dog likes to sleep"_sd},
+ {"path.some.path"_sd, 8, "a dog likes to eat"_sd},
+ {"path.some.path"_sd, 9, "a dog likes to walk"_sd},
+ {"path.some.path"_sd, 10, "a dog likes to play"_sd},
+ {"path.some.path"_sd, 11, "a dog likes to drool"_sd},
+ {"path.some.path"_sd, 12, "a dog likes to sniff"_sd},
+ {"path.some.path"_sd, 13, "a dog likes to snuggle"_sd},
+ {"path.some.path"_sd, 14, "a dog likes to go to the park with other dogs"_sd},
+
+ {"path.some.otherpath"_sd, 1, "life is understood backwards but lived forwards"_sd},
+ {"path.some.otherpath"_sd, 2, "the key is not spending time but investing it"_sd},
+ {"path.some.otherpath"_sd, 3, "if we take care of the moments, the years"_sd},
+ {"path.some.otherpath"_sd, 4, "time is an illusion"_sd},
+ {"path.some.otherpath"_sd, 5, "life and times"_sd},
+ {"path.some.otherpath"_sd, 667, "how many more times"_sd},
+
+ {"still.some.otherpath"_sd, 667, "islands in the stream"_sd},
+ {"still.some.otherpath"_sd, 668, "the gambler"_sd},
+ {"still.some.otherpath"_sd, 669, "coward of the county"_sd},
+ {"still.some.otherpath"_sd, 662, "lady"_sd},
+ {"still.some.otherpath"_sd, 661, "we've got tonight"_sd},
+ {"still.some.otherpath"_sd, 660, "through the years"_sd},
+};
+
+TEST_F(ColumnIndexConsistencyTest, VerifyHashFunction) {
+ stdx::unordered_set<int64_t> hashes;
+ for (const auto& v : kEntries) {
+ hashes.insert(ColumnIndexConsistency::_hash(v));
+ }
+ ASSERT_EQ(hashes.size(), 50u);
+}
+
+TEST_F(ColumnIndexConsistencyTest, VerifyEntriesEqual) {
+ ColumnIndexConsistency cic = createColumnStoreConsistency();
+
+ for (const auto& v : kEntries) {
+ cic.addDocEntry(v);
+ }
+ for (const auto& v : kEntries) {
+ cic.addIndexEntry(v);
+ }
+ ASSERT_FALSE(cic.haveEntryMismatch());
+ ASSERT_EQ(cic.getNumDocs(), cic.getTotalIndexKeys());
+}
+
+TEST_F(ColumnIndexConsistencyTest, VerifyEntriesNotEqual) {
+ ColumnIndexConsistency cic = createColumnStoreConsistency();
+
+ for (const auto& v : kEntries) {
+ cic.addDocEntry(v);
+ }
+ cic.addDocEntry({"path444"_sd, 4, "value1"_sd});
+ for (const auto& v : kEntries) {
+ cic.addIndexEntry(v);
+ }
+ cic.addIndexEntry({"path444"_sd, 4, "value10"_sd});
+ ASSERT_TRUE(cic.haveEntryMismatch());
+ ASSERT_EQ(cic.getNumDocs(), cic.getTotalIndexKeys());
+}
+
+TEST_F(ColumnIndexConsistencyTest, VerifyUnorderedEntriesEqual) {
+ ColumnIndexConsistency cic = createColumnStoreConsistency();
+
+ for (int i = 0; i < 10000; i++) {
+ cic.addDocEntry({"foo"_sd, i, "bar"_sd});
+ }
+
+ for (int i = 9999; i >= 0; i--) {
+ cic.addIndexEntry({"foo"_sd, i, "bar"_sd});
+ }
+ ASSERT_EQ(cic.getNumDocs(), 10000u);
+ ASSERT_EQ(cic.getNumDocs(), cic.getTotalIndexKeys());
+ ASSERT_FALSE(cic.haveEntryMismatch());
+
+ cic.addDocEntry({"foo"_sd, 5311, "bar"_sd});
+ ASSERT_NE(cic.getNumDocs(), cic.getTotalIndexKeys());
+ ASSERT_TRUE(cic.haveEntryMismatch());
+
+ cic.addIndexEntry({"foo"_sd, 5311, "bar"_sd});
+ ASSERT_EQ(cic.getNumDocs(), cic.getTotalIndexKeys());
+ ASSERT_FALSE(cic.haveEntryMismatch());
+
+ for (const auto& v : kEntries) {
+ cic.addDocEntry(v);
+ }
+ for (auto it = kEntries.crbegin(); it != kEntries.crend(); it++) {
+ cic.addIndexEntry(*it);
+ }
+ ASSERT_FALSE(cic.haveEntryMismatch());
+ ASSERT_EQ(cic.getNumDocs(), cic.getTotalIndexKeys());
+}
+
+TEST_F(ColumnIndexConsistencyTest, VerifyEvenBucketDistribution) {
+ const size_t numBuckets = 1000;
+ ColumnIndexConsistency cic = createColumnStoreConsistency(numBuckets);
+
+ for (int i = 0; i < 10000; i++) {
+ cic.addDocEntry({"foo"_sd, i, "bar"_sd});
+ }
+
+ uint32_t maxValue = 0;
+ for (size_t i = 0; i < numBuckets; i++) {
+ maxValue = std::max(cic.getBucketCount(i), maxValue);
+ }
+ ASSERT_LT(maxValue, uint32_t(40)); // perfect would be 20, so 2x that is our made-up threshold
+}
+
+TEST_F(ColumnIndexConsistencyTest, VerifyEvenBucketDistributionFewerInputs) {
+ // fewer, more varied inputs
+ const size_t numBuckets = 5;
+ ColumnIndexConsistency cic = createColumnStoreConsistency(numBuckets);
+ for (const auto& v : kEntries) {
+ cic.addDocEntry(v);
+ }
+ uint32_t maxValue = 0;
+ for (size_t i = 0; i < numBuckets; i++) {
+ maxValue = std::max(cic.getBucketCount(i), maxValue);
+ }
+ ASSERT_LT(maxValue, uint32_t(40)); // perfect would be 20, 2x that is our made-up threshold
+}
+
+TEST_F(ColumnIndexConsistencyTest, VerifySizeCounters) {
+ const size_t numBuckets = 5;
+ ColumnIndexConsistency cic = createColumnStoreConsistency(numBuckets);
+
+ cic.addDocEntry({""_sd, 1, "b"_sd}); // 9 bytes
+ cic.addDocEntry({"f"_sd, 0, ""_sd}); // 9 bytes
+ cic.addDocEntry({"f"_sd, 1, "b"_sd}); // 10 bytes
+
+ cic.addIndexEntry({""_sd, 1, "b"_sd});
+ cic.addIndexEntry({"f"_sd, 0, ""_sd});
+ cic.addIndexEntry({"f"_sd, 1, "b"_sd});
+
+ size_t totalSize = 0;
+ for (size_t i = 0; i < numBuckets; i++) {
+ totalSize += cic.getBucketSizeBytes(i);
+ }
+ ASSERT_EQ(totalSize, 112u); // 28 * 2 (hashed twice) * 2 (hash doc + index) = 112
+}
+
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/catalog/database_impl.cpp b/src/mongo/db/catalog/database_impl.cpp
index 54d9b3fd930..47416142ee4 100644
--- a/src/mongo/db/catalog/database_impl.cpp
+++ b/src/mongo/db/catalog/database_impl.cpp
@@ -57,6 +57,7 @@
#include "mongo/db/query/query_knobs_gen.h"
#include "mongo/db/repl/drop_pending_collection_reaper.h"
#include "mongo/db/repl/oplog.h"
+#include "mongo/db/repl/replication_consistency_markers_impl.h"
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/s/collection_sharding_state.h"
#include "mongo/db/s/database_sharding_state.h"
@@ -749,6 +750,21 @@ Collection* DatabaseImpl::createVirtualCollection(OperationContext* opCtx,
vopts);
}
+/**
+ * Some system collections (namely, 'config.transactions' and
+ * 'local.replset.oplogTruncateAfterPoint') are special internal collections that are written to
+ * without updating indexes, so there's no value in creating an index on them.
+ *
+ * @return true if any modification on the collection data leads to updating the indexes defined on
+ * it.
+ */
+bool doesCollectionModificationsUpdateIndexes(StringData collName) {
+ // TODO SERVER-72496: investigate whether 'config.transactions' should remain here or not.
+ return collName != "config.transactions" &&
+ collName !=
+ repl::ReplicationConsistencyMarkersImpl::kDefaultOplogTruncateAfterPointNamespace;
+}
+
Collection* DatabaseImpl::_createCollection(
OperationContext* opCtx,
const NamespaceString& nss,
@@ -856,7 +872,8 @@ Collection* DatabaseImpl::_createCollection(
opCtx,
collection,
!idIndex.isEmpty() ? idIndex : ic->getDefaultIdIndexSpec(collection)));
- createColumnIndex = createColumnIndexOnAllCollections.shouldFail();
+ createColumnIndex = createColumnIndexOnAllCollections.shouldFail() &&
+ doesCollectionModificationsUpdateIndexes(nss.ns());
} else {
// autoIndexId: false is only allowed on unreplicated collections.
uassert(50001,
diff --git a/src/mongo/db/catalog/index_consistency.cpp b/src/mongo/db/catalog/index_consistency.cpp
index 40dec646b08..f16bb7ebfa3 100644
--- a/src/mongo/db/catalog/index_consistency.cpp
+++ b/src/mongo/db/catalog/index_consistency.cpp
@@ -43,17 +43,22 @@
#include "mongo/db/index/index_descriptor.h"
#include "mongo/db/multi_key_path_tracker.h"
#include "mongo/db/record_id_helpers.h"
+#include "mongo/db/storage/execution_context.h"
#include "mongo/logv2/log.h"
#include "mongo/util/string_map.h"
+#include "mongo/util/testing_proctor.h"
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kStorage
namespace mongo {
+const long long IndexConsistency::kInterruptIntervalNumRecords = 4096;
+const size_t IndexConsistency::kNumHashBuckets = 1U << 16;
+
namespace {
-const size_t kNumHashBuckets = 1U << 16;
+MONGO_FAIL_POINT_DEFINE(crashOnMultikeyValidateFailure);
StringSet::hasher hash;
@@ -95,18 +100,31 @@ IndexEntryInfo::IndexEntryInfo(const IndexInfo& indexInfo,
keyString(entryKeyString) {}
IndexConsistency::IndexConsistency(OperationContext* opCtx,
- CollectionValidation::ValidateState* validateState)
+ CollectionValidation::ValidateState* validateState,
+ const size_t numHashBuckets)
: _validateState(validateState), _firstPhase(true) {
- _indexKeyBuckets.resize(kNumHashBuckets);
+ _indexKeyBuckets.resize(numHashBuckets);
+}
+
+void IndexConsistency::setSecondPhase() {
+ invariant(_firstPhase);
+ _firstPhase = false;
+}
+KeyStringIndexConsistency::KeyStringIndexConsistency(
+ OperationContext* opCtx,
+ CollectionValidation::ValidateState* validateState,
+ const size_t numHashBuckets)
+ : IndexConsistency(opCtx, validateState, numHashBuckets) {
for (const auto& index : _validateState->getIndexes()) {
- const IndexDescriptor* descriptor = index->descriptor();
+ const auto descriptor = index->descriptor();
IndexAccessMethod* accessMethod = const_cast<IndexAccessMethod*>(index->accessMethod());
_indexesInfo.emplace(descriptor->indexName(), IndexInfo(descriptor, accessMethod));
}
}
-void IndexConsistency::addMultikeyMetadataPath(const KeyString::Value& ks, IndexInfo* indexInfo) {
+void KeyStringIndexConsistency::addMultikeyMetadataPath(const KeyString::Value& ks,
+ IndexInfo* indexInfo) {
auto hash = _hashKeyString(ks, indexInfo->indexNameHash);
if (MONGO_unlikely(_validateState->extraLoggingForTest())) {
LOGV2(6208500,
@@ -117,8 +135,8 @@ void IndexConsistency::addMultikeyMetadataPath(const KeyString::Value& ks, Index
indexInfo->hashedMultikeyMetadataPaths.emplace(hash);
}
-void IndexConsistency::removeMultikeyMetadataPath(const KeyString::Value& ks,
- IndexInfo* indexInfo) {
+void KeyStringIndexConsistency::removeMultikeyMetadataPath(const KeyString::Value& ks,
+ IndexInfo* indexInfo) {
auto hash = _hashKeyString(ks, indexInfo->indexNameHash);
if (MONGO_unlikely(_validateState->extraLoggingForTest())) {
LOGV2(6208501,
@@ -129,23 +147,18 @@ void IndexConsistency::removeMultikeyMetadataPath(const KeyString::Value& ks,
indexInfo->hashedMultikeyMetadataPaths.erase(hash);
}
-size_t IndexConsistency::getMultikeyMetadataPathCount(IndexInfo* indexInfo) {
+size_t KeyStringIndexConsistency::getMultikeyMetadataPathCount(IndexInfo* indexInfo) {
return indexInfo->hashedMultikeyMetadataPaths.size();
}
-bool IndexConsistency::haveEntryMismatch() const {
+bool KeyStringIndexConsistency::haveEntryMismatch() const {
return std::any_of(_indexKeyBuckets.begin(),
_indexKeyBuckets.end(),
[](const IndexKeyBucket& bucket) -> bool { return bucket.indexKeyCount; });
}
-void IndexConsistency::setSecondPhase() {
- invariant(_firstPhase);
- _firstPhase = false;
-}
-
-void IndexConsistency::repairMissingIndexEntries(OperationContext* opCtx,
- ValidateResults* results) {
+void KeyStringIndexConsistency::repairIndexEntries(OperationContext* opCtx,
+ ValidateResults* results) {
invariant(_validateState->getIndexes().size() > 0);
std::shared_ptr<const IndexCatalogEntry> index = _validateState->getIndexes().front();
for (auto it = _missingIndexEntries.begin(); it != _missingIndexEntries.end();) {
@@ -194,7 +207,8 @@ void IndexConsistency::repairMissingIndexEntries(OperationContext* opCtx,
}
}
-void IndexConsistency::addIndexEntryErrors(ValidateResults* results) {
+void KeyStringIndexConsistency::addIndexEntryErrors(OperationContext* opCtx,
+ ValidateResults* results) {
invariant(!_firstPhase);
// We'll report up to 1MB for extra index entry errors and missing index entry errors.
@@ -290,8 +304,8 @@ void IndexConsistency::addIndexEntryErrors(ValidateResults* results) {
}
}
-void IndexConsistency::addDocumentMultikeyPaths(IndexInfo* indexInfo,
- const MultikeyPaths& newPaths) {
+void KeyStringIndexConsistency::addDocumentMultikeyPaths(IndexInfo* indexInfo,
+ const MultikeyPaths& newPaths) {
invariant(newPaths.size());
if (indexInfo->docMultikeyPaths.size()) {
MultikeyPathTracker::mergeMultikeyPaths(&indexInfo->docMultikeyPaths, newPaths);
@@ -301,10 +315,10 @@ void IndexConsistency::addDocumentMultikeyPaths(IndexInfo* indexInfo,
}
}
-void IndexConsistency::addDocKey(OperationContext* opCtx,
- const KeyString::Value& ks,
- IndexInfo* indexInfo,
- const RecordId& recordId) {
+void KeyStringIndexConsistency::addDocKey(OperationContext* opCtx,
+ const KeyString::Value& ks,
+ IndexInfo* indexInfo,
+ const RecordId& recordId) {
auto rawHash = ks.hash(indexInfo->indexNameHash);
auto hashLower = rawHash % kNumHashBuckets;
auto hashUpper = (rawHash / kNumHashBuckets) % kNumHashBuckets;
@@ -358,11 +372,11 @@ void IndexConsistency::addDocKey(OperationContext* opCtx,
}
}
-void IndexConsistency::addIndexKey(OperationContext* opCtx,
- const KeyString::Value& ks,
- IndexInfo* indexInfo,
- const RecordId& recordId,
- ValidateResults* results) {
+void KeyStringIndexConsistency::addIndexKey(OperationContext* opCtx,
+ const KeyString::Value& ks,
+ IndexInfo* indexInfo,
+ const RecordId& recordId,
+ ValidateResults* results) {
auto rawHash = ks.hash(indexInfo->indexNameHash);
auto hashLower = rawHash % kNumHashBuckets;
auto hashUpper = (rawHash / kNumHashBuckets) % kNumHashBuckets;
@@ -442,7 +456,7 @@ void IndexConsistency::addIndexKey(OperationContext* opCtx,
}
}
-bool IndexConsistency::limitMemoryUsageForSecondPhase(ValidateResults* result) {
+bool KeyStringIndexConsistency::limitMemoryUsageForSecondPhase(ValidateResults* result) {
invariant(!_firstPhase);
const uint32_t maxMemoryUsageBytes = maxValidateMemoryUsageMB.load() * 1024 * 1024;
@@ -508,11 +522,436 @@ bool IndexConsistency::limitMemoryUsageForSecondPhase(ValidateResults* result) {
return true;
}
-BSONObj IndexConsistency::_generateInfo(const std::string& indexName,
- const BSONObj& keyPattern,
- const RecordId& recordId,
- const BSONObj& indexKey,
- const BSONObj& idKey) {
+void KeyStringIndexConsistency::validateIndexKeyCount(OperationContext* opCtx,
+ const IndexCatalogEntry* index,
+ long long* numRecords,
+ IndexValidateResults& results) {
+ // Fetch the total number of index entries we previously found traversing the index.
+ const IndexDescriptor* desc = index->descriptor();
+ const std::string indexName = desc->indexName();
+ IndexInfo* indexInfo = &this->getIndexInfo(indexName);
+ const auto numTotalKeys = indexInfo->numKeys;
+
+ // Update numRecords by subtracting number of records removed from record store in repair mode
+ // when validating index consistency
+ (*numRecords) -= results.keysRemovedFromRecordStore;
+
+ // Do not fail on finding too few index entries compared to collection entries when full:false.
+ bool hasTooFewKeys = false;
+ const bool noErrorOnTooFewKeys = !_validateState->isFullIndexValidation();
+
+ if (desc->isIdIndex() && numTotalKeys != (*numRecords)) {
+ hasTooFewKeys = (numTotalKeys < (*numRecords));
+ const std::string msg = str::stream()
+ << "number of _id index entries (" << numTotalKeys
+ << ") does not match the number of documents in the index (" << (*numRecords) << ")";
+ if (noErrorOnTooFewKeys && (numTotalKeys < (*numRecords))) {
+ results.warnings.push_back(msg);
+ } else {
+ results.errors.push_back(msg);
+ results.valid = false;
+ }
+ }
+
+ // Hashed indexes may never be multikey.
+ if (desc->getAccessMethodName() == IndexNames::HASHED &&
+ index->isMultikey(opCtx, _validateState->getCollection())) {
+ results.errors.push_back(str::stream() << "Hashed index is incorrectly marked multikey: "
+ << desc->indexName());
+ results.valid = false;
+ }
+
+ // Confirm that the number of index entries is not greater than the number of documents in the
+ // collection. This check is only valid for indexes that are not multikey (indexed arrays
+ // produce an index key per array entry) and not $** indexes which can produce index keys for
+ // multiple paths within a single document.
+ if (results.valid && !index->isMultikey(opCtx, _validateState->getCollection()) &&
+ desc->getIndexType() != IndexType::INDEX_WILDCARD && numTotalKeys > (*numRecords)) {
+ const std::string err = str::stream()
+ << "index " << desc->indexName() << " is not multi-key, but has more entries ("
+ << numTotalKeys << ") than documents in the index (" << (*numRecords) << ")";
+ results.errors.push_back(err);
+ results.valid = false;
+ }
+
+ // Ignore any indexes with a special access method. If an access method name is given, the
+ // index may be a full text, geo or special index plugin with different semantics.
+ if (results.valid && !desc->isSparse() && !desc->isPartial() && !desc->isIdIndex() &&
+ desc->getAccessMethodName() == "" && numTotalKeys < (*numRecords)) {
+ hasTooFewKeys = true;
+ const std::string msg = str::stream()
+ << "index " << desc->indexName() << " is not sparse or partial, but has fewer entries ("
+ << numTotalKeys << ") than documents in the index (" << (*numRecords) << ")";
+ if (noErrorOnTooFewKeys) {
+ results.warnings.push_back(msg);
+ } else {
+ results.errors.push_back(msg);
+ results.valid = false;
+ }
+ }
+
+ if (!_validateState->isFullIndexValidation() && hasTooFewKeys) {
+ const std::string warning = str::stream()
+ << "index " << desc->indexName() << " has fewer keys than records."
+ << " Please re-run the validate command with {full: true}";
+ results.warnings.push_back(warning);
+ }
+}
+
+namespace {
+// Ensures that index entries are in increasing or decreasing order.
+void _validateKeyOrder(OperationContext* opCtx,
+ const IndexCatalogEntry* index,
+ const KeyString::Value& currKey,
+ const KeyString::Value& prevKey,
+ IndexValidateResults* results) {
+ const auto descriptor = index->descriptor();
+ const bool unique = descriptor->unique();
+
+ // KeyStrings will be in strictly increasing order because all keys are sorted and they are in
+ // the format (Key, RID), and all RecordIDs are unique.
+ if (currKey.compare(prevKey) <= 0) {
+ if (results && results->valid) {
+ results->errors.push_back(str::stream()
+ << "index '" << descriptor->indexName()
+ << "' is not in strictly ascending or descending order");
+ }
+ if (results) {
+ results->valid = false;
+ }
+ return;
+ }
+
+ if (unique) {
+ // Unique indexes must not have duplicate keys.
+ const int cmp = currKey.compareWithoutRecordIdLong(prevKey);
+ if (cmp != 0) {
+ return;
+ }
+
+ if (results && results->valid) {
+ const auto bsonKey =
+ KeyString::toBson(currKey, Ordering::make(descriptor->keyPattern()));
+ const auto firstRecordId =
+ KeyString::decodeRecordIdLongAtEnd(prevKey.getBuffer(), prevKey.getSize());
+ const auto secondRecordId =
+ KeyString::decodeRecordIdLongAtEnd(currKey.getBuffer(), currKey.getSize());
+ results->errors.push_back(str::stream() << "Unique index '" << descriptor->indexName()
+ << "' has duplicate key: " << bsonKey
+ << ", first record: " << firstRecordId
+ << ", second record: " << secondRecordId);
+ }
+ if (results) {
+ results->valid = false;
+ }
+ }
+}
+} // namespace
+
+int64_t KeyStringIndexConsistency::traverseIndex(OperationContext* opCtx,
+ const IndexCatalogEntry* index,
+ ProgressMeterHolder& _progress,
+ ValidateResults* results) {
+ const auto descriptor = index->descriptor();
+ const auto indexName = descriptor->indexName();
+ auto& indexResults = results->indexResultsMap[indexName];
+ IndexInfo& indexInfo = this->getIndexInfo(indexName);
+ int64_t numKeys = 0;
+
+ bool isFirstEntry = true;
+
+ const KeyString::Version version =
+ index->accessMethod()->asSortedData()->getSortedDataInterface()->getKeyStringVersion();
+
+ KeyString::Builder firstKeyStringBuilder(
+ version, BSONObj(), indexInfo.ord, KeyString::Discriminator::kExclusiveBefore);
+ const KeyString::Value firstKeyString = firstKeyStringBuilder.getValueCopy();
+ KeyString::Value prevIndexKeyStringValue;
+
+ // Ensure that this index has an open index cursor.
+ const auto indexCursorIt = _validateState->getIndexCursors().find(indexName);
+ invariant(indexCursorIt != _validateState->getIndexCursors().end());
+
+ const std::unique_ptr<SortedDataInterfaceThrottleCursor>& indexCursor = indexCursorIt->second;
+
+ boost::optional<KeyStringEntry> indexEntry;
+ try {
+ indexEntry = indexCursor->seekForKeyString(opCtx, firstKeyString);
+ } catch (const DBException& ex) {
+ if (TestingProctor::instance().isEnabled() && ex.code() != ErrorCodes::WriteConflict) {
+ LOGV2_FATAL(5318400,
+ "Error seeking to first key",
+ "error"_attr = ex.toString(),
+ "index"_attr = indexName,
+ "key"_attr = firstKeyString.toString());
+ }
+ throw;
+ }
+
+ const auto keyFormat =
+ index->accessMethod()->asSortedData()->getSortedDataInterface()->rsKeyFormat();
+ const RecordId kWildcardMultikeyMetadataRecordId = record_id_helpers::reservedIdFor(
+ record_id_helpers::ReservationId::kWildcardMultikeyMetadataId, keyFormat);
+
+ // Warn about unique indexes with keys in old format (without record id).
+ bool foundOldUniqueIndexKeys = false;
+
+ while (indexEntry) {
+ if (!isFirstEntry) {
+ _validateKeyOrder(
+ opCtx, index, indexEntry->keyString, prevIndexKeyStringValue, &indexResults);
+ }
+
+ if (!foundOldUniqueIndexKeys && !descriptor->isIdIndex() && descriptor->unique() &&
+ !indexCursor->isRecordIdAtEndOfKeyString()) {
+ results->warnings.push_back(
+ fmt::format("Unique index {} has one or more keys in the old format (without "
+ "embedded record id). First record: {}",
+ indexInfo.indexName,
+ indexEntry->loc.toString()));
+ foundOldUniqueIndexKeys = true;
+ }
+
+ const bool isMetadataKey = indexEntry->loc == kWildcardMultikeyMetadataRecordId;
+ if (descriptor->getIndexType() == IndexType::INDEX_WILDCARD && isMetadataKey) {
+ this->removeMultikeyMetadataPath(indexEntry->keyString, &indexInfo);
+ } else {
+ try {
+ this->addIndexKey(
+ opCtx, indexEntry->keyString, &indexInfo, indexEntry->loc, results);
+ } catch (const DBException& e) {
+ StringBuilder ss;
+ ss << "Parsing index key for " << indexInfo.indexName << " recId "
+ << indexEntry->loc << " threw exception " << e.toString();
+ results->errors.push_back(ss.str());
+ results->valid = false;
+ }
+ }
+ {
+ stdx::unique_lock<Client> lk(*opCtx->getClient());
+ _progress.get(lk)->hit();
+ }
+ numKeys++;
+ isFirstEntry = false;
+ prevIndexKeyStringValue = indexEntry->keyString;
+
+ if (numKeys % kInterruptIntervalNumRecords == 0) {
+ // Periodically checks for interrupts and yields.
+ opCtx->checkForInterrupt();
+ _validateState->yield(opCtx);
+ }
+
+ try {
+ indexEntry = indexCursor->nextKeyString(opCtx);
+ } catch (const DBException& ex) {
+ if (TestingProctor::instance().isEnabled() && ex.code() != ErrorCodes::WriteConflict) {
+ LOGV2_FATAL(5318401,
+ "Error advancing index cursor",
+ "error"_attr = ex.toString(),
+ "index"_attr = indexName,
+ "prevKey"_attr = prevIndexKeyStringValue.toString());
+ }
+ throw;
+ }
+ }
+
+ if (results && this->getMultikeyMetadataPathCount(&indexInfo) > 0) {
+ results->errors.push_back(str::stream()
+ << "Index '" << descriptor->indexName()
+ << "' has one or more missing multikey metadata index keys");
+ results->valid = false;
+ }
+
+ // Adjust multikey metadata when allowed. These states are all allowed by the design of
+ // multikey. A collection should still be valid without these adjustments.
+ if (_validateState->adjustMultikey()) {
+
+ // If this collection has documents that make this index multikey, then check whether those
+ // multikey paths match the index's metadata.
+ const auto indexPaths = index->getMultikeyPaths(opCtx, _validateState->getCollection());
+ const auto& documentPaths = indexInfo.docMultikeyPaths;
+ if (indexInfo.multikeyDocs && documentPaths != indexPaths) {
+ LOGV2(5367500,
+ "Index's multikey paths do not match those of its documents",
+ "index"_attr = descriptor->indexName(),
+ "indexPaths"_attr = MultikeyPathTracker::dumpMultikeyPaths(indexPaths),
+ "documentPaths"_attr = MultikeyPathTracker::dumpMultikeyPaths(documentPaths));
+
+ // Since we have the correct multikey path information for this index, we can tighten up
+ // its metadata to improve query performance. This may apply in two distinct scenarios:
+ // 1. Collection data has changed such that the current multikey paths on the index
+ // are too permissive and certain document paths are no longer multikey.
+ // 2. This index was built before 3.4, and there is no multikey path information for
+ // the index. We can effectively 'upgrade' the index so that it does not need to be
+ // rebuilt to update this information.
+ writeConflictRetry(opCtx, "updateMultikeyPaths", _validateState->nss().ns(), [&]() {
+ WriteUnitOfWork wuow(opCtx);
+ auto writeableIndex = const_cast<IndexCatalogEntry*>(index);
+ const bool isMultikey = true;
+ writeableIndex->forceSetMultikey(
+ opCtx, _validateState->getCollection(), isMultikey, documentPaths);
+ wuow.commit();
+ });
+
+ if (results) {
+ results->warnings.push_back(str::stream() << "Updated index multikey metadata"
+ << ": " << descriptor->indexName());
+ results->repaired = true;
+ }
+ }
+
+ // If this index does not need to be multikey, then unset the flag.
+ if (index->isMultikey(opCtx, _validateState->getCollection()) && !indexInfo.multikeyDocs) {
+ invariant(!indexInfo.docMultikeyPaths.size());
+
+ LOGV2(5367501,
+ "Index is multikey but there are no multikey documents",
+ "index"_attr = descriptor->indexName());
+
+ // This makes an improvement in the case that no documents make the index multikey and
+ // the flag can be unset entirely. This may be due to a change in the data or historical
+ // multikey bugs that have persisted incorrect multikey infomation.
+ writeConflictRetry(opCtx, "unsetMultikeyPaths", _validateState->nss().ns(), [&]() {
+ WriteUnitOfWork wuow(opCtx);
+ auto writeableIndex = const_cast<IndexCatalogEntry*>(index);
+ const bool isMultikey = false;
+ writeableIndex->forceSetMultikey(
+ opCtx, _validateState->getCollection(), isMultikey, {});
+ wuow.commit();
+ });
+
+ if (results) {
+ results->warnings.push_back(str::stream() << "Unset index multikey metadata"
+ << ": " << descriptor->indexName());
+ results->repaired = true;
+ }
+ }
+ }
+
+ return numKeys;
+}
+
+void KeyStringIndexConsistency::traverseRecord(OperationContext* opCtx,
+ const CollectionPtr& coll,
+ const IndexCatalogEntry* index,
+ const RecordId& recordId,
+ const BSONObj& recordBson,
+ ValidateResults* results) {
+ const auto iam = index->accessMethod()->asSortedData();
+
+ const auto descriptor = index->descriptor();
+ SharedBufferFragmentBuilder pool(KeyString::HeapBuilder::kHeapAllocatorDefaultBytes);
+ auto& executionCtx = StorageExecutionContext::get(opCtx);
+
+ const auto documentKeySet = executionCtx.keys();
+ const auto multikeyMetadataKeys = executionCtx.multikeyMetadataKeys();
+ const auto documentMultikeyPaths = executionCtx.multikeyPaths();
+
+ iam->getKeys(opCtx,
+ coll,
+ pool,
+ recordBson,
+ InsertDeleteOptions::ConstraintEnforcementMode::kEnforceConstraints,
+ SortedDataIndexAccessMethod::GetKeysContext::kAddingKeys,
+ documentKeySet.get(),
+ multikeyMetadataKeys.get(),
+ documentMultikeyPaths.get(),
+ recordId);
+
+ const bool shouldBeMultikey =
+ iam->shouldMarkIndexAsMultikey(documentKeySet->size(),
+ {multikeyMetadataKeys->begin(), multikeyMetadataKeys->end()},
+ *documentMultikeyPaths);
+
+ if (!index->isMultikey(opCtx, coll) && shouldBeMultikey) {
+ if (_validateState->fixErrors()) {
+ writeConflictRetry(opCtx, "setIndexAsMultikey", coll->ns().ns(), [&] {
+ WriteUnitOfWork wuow(opCtx);
+ coll->getIndexCatalog()->setMultikeyPaths(
+ opCtx, coll, descriptor, *multikeyMetadataKeys, *documentMultikeyPaths);
+ wuow.commit();
+ });
+
+ LOGV2(4614700,
+ "Index set to multikey",
+ "indexName"_attr = descriptor->indexName(),
+ "collection"_attr = coll->ns().ns());
+ results->warnings.push_back(str::stream() << "Index " << descriptor->indexName()
+ << " set to multikey.");
+ results->repaired = true;
+ } else {
+ auto& curRecordResults = (results->indexResultsMap)[descriptor->indexName()];
+ const std::string msg = fmt::format(
+ "Index {} is not multikey but has more than one key in document with "
+ "RecordId({}) and {}",
+ descriptor->indexName(),
+ recordId.toString(),
+ recordBson.getField("_id").toString());
+ curRecordResults.errors.push_back(msg);
+ curRecordResults.valid = false;
+ if (crashOnMultikeyValidateFailure.shouldFail()) {
+ invariant(false, msg);
+ }
+ }
+ }
+
+ if (index->isMultikey(opCtx, coll)) {
+ const MultikeyPaths& indexPaths = index->getMultikeyPaths(opCtx, coll);
+ if (!MultikeyPathTracker::covers(indexPaths, *documentMultikeyPaths.get())) {
+ if (_validateState->fixErrors()) {
+ writeConflictRetry(opCtx, "increaseMultikeyPathCoverage", coll->ns().ns(), [&] {
+ WriteUnitOfWork wuow(opCtx);
+ coll->getIndexCatalog()->setMultikeyPaths(
+ opCtx, coll, descriptor, *multikeyMetadataKeys, *documentMultikeyPaths);
+ wuow.commit();
+ });
+
+ LOGV2(4614701,
+ "Multikey paths updated to cover multikey document",
+ "indexName"_attr = descriptor->indexName(),
+ "collection"_attr = coll->ns().ns());
+ results->warnings.push_back(str::stream() << "Index " << descriptor->indexName()
+ << " multikey paths updated.");
+ results->repaired = true;
+ } else {
+ const std::string msg = fmt::format(
+ "Index {} multikey paths do not cover a document with RecordId({}) and {}",
+ descriptor->indexName(),
+ recordId.toString(),
+ recordBson.getField("_id").toString());
+ auto& curRecordResults = (results->indexResultsMap)[descriptor->indexName()];
+ curRecordResults.errors.push_back(msg);
+ curRecordResults.valid = false;
+ }
+ }
+ }
+
+ IndexInfo& indexInfo = this->getIndexInfo(descriptor->indexName());
+ if (shouldBeMultikey) {
+ indexInfo.multikeyDocs = true;
+ }
+
+ // An empty set of multikey paths indicates that this index does not track path-level
+ // multikey information and we should do no tracking.
+ if (shouldBeMultikey && documentMultikeyPaths->size()) {
+ this->addDocumentMultikeyPaths(&indexInfo, *documentMultikeyPaths);
+ }
+
+ for (const auto& keyString : *multikeyMetadataKeys) {
+ this->addMultikeyMetadataPath(keyString, &indexInfo);
+ }
+
+ for (const auto& keyString : *documentKeySet) {
+ _totalIndexKeys++;
+ this->addDocKey(opCtx, keyString, &indexInfo, recordId);
+ }
+}
+
+BSONObj KeyStringIndexConsistency::_generateInfo(const std::string& indexName,
+ const BSONObj& keyPattern,
+ const RecordId& recordId,
+ const BSONObj& indexKey,
+ const BSONObj& idKey) {
// We need to rehydrate the indexKey for improved readability.
// {"": ObjectId(...)} -> {"_id": ObjectId(...)}
@@ -543,8 +982,8 @@ BSONObj IndexConsistency::_generateInfo(const std::string& indexName,
return infoBuilder.obj();
}
-uint32_t IndexConsistency::_hashKeyString(const KeyString::Value& ks,
- uint32_t indexNameHash) const {
+uint32_t KeyStringIndexConsistency::_hashKeyString(const KeyString::Value& ks,
+ const uint32_t indexNameHash) const {
return ks.hash(indexNameHash);
}
} // namespace mongo
diff --git a/src/mongo/db/catalog/index_consistency.h b/src/mongo/db/catalog/index_consistency.h
index ade97d20918..f873e83addf 100644
--- a/src/mongo/db/catalog/index_consistency.h
+++ b/src/mongo/db/catalog/index_consistency.h
@@ -32,6 +32,7 @@
#include "mongo/bson/simple_bsonobj_comparator.h"
#include "mongo/db/catalog/validate_state.h"
#include "mongo/db/storage/key_string.h"
+#include "mongo/util/progress_meter.h"
namespace mongo {
@@ -69,8 +70,7 @@ struct IndexInfo {
};
/**
- * Used by _missingIndexEntries to be able to easily access keyString during
- * repairMissingIndexEntries.
+ * Used by _missingIndexEntries to be able to easily access keyString during repairIndexEntries.
*/
struct IndexEntryInfo {
IndexEntryInfo(const IndexInfo& indexInfo,
@@ -85,90 +85,116 @@ struct IndexEntryInfo {
KeyString::Value keyString;
};
+
/**
- * The IndexConsistency class is used to keep track of the index consistency.
- * It does this by using the index keys from index entries and index keys generated from the
- * document to ensure there is a one-to-one mapping for each key.
- * In addition, an IndexObserver class can be hooked into the IndexAccessMethod to inform
- * this class about changes to the indexes during a validation and compensate for them.
+ * The IndexConsistency class provides the base class definitions for index-consistency
+ * sub-classes. The base implementation in this class provides the basis for keeping track of the
+ * index consistency. It does this by using the index keys from index entries and index keys
+ * generated from the document to ensure there is a one-to-one mapping for each key.
*/
-class IndexConsistency final {
+class IndexConsistency {
using IndexInfoMap = std::map<std::string, IndexInfo>;
using IndexKey = std::pair<std::string, std::string>;
public:
- IndexConsistency(OperationContext* opCtx, CollectionValidation::ValidateState* validateState);
+ static const long long kInterruptIntervalNumRecords;
+ static const size_t kNumHashBuckets;
- /**
- * During the first phase of validation, given the document's key KeyString, increment the
- * corresponding `_indexKeyCount` by hashing it.
- * For the second phase of validation, keep track of the document keys that hashed to
- * inconsistent hash buckets during the first phase of validation.
- */
- void addDocKey(OperationContext* opCtx,
- const KeyString::Value& ks,
- IndexInfo* indexInfo,
- const RecordId& recordId);
+ IndexConsistency(OperationContext* opCtx,
+ CollectionValidation::ValidateState* validateState,
+ size_t numHashBuckets = kNumHashBuckets);
/**
- * During the first phase of validation, given the index entry's KeyString, decrement the
- * corresponding `_indexKeyCount` by hashing it.
- * For the second phase of validation, try to match the index entry keys that hashed to
- * inconsistent hash buckets during the first phase of validation to document keys.
+ * Informs the IndexConsistency object that we're advancing to the second phase of
+ * index validation.
*/
- void addIndexKey(OperationContext* opCtx,
- const KeyString::Value& ks,
- IndexInfo* indexInfo,
- const RecordId& recordId,
- ValidateResults* results);
+ void setSecondPhase();
- /**
- * During the first phase of validation, tracks the multikey paths for every observed document.
- */
- void addDocumentMultikeyPaths(IndexInfo* indexInfo, const MultikeyPaths& multikeyPaths);
+ virtual ~IndexConsistency() = default;
- /**
- * To validate $** multikey metadata paths, we first scan the collection and add a hash of all
- * multikey paths encountered to a set. We then scan the index for multikey metadata path
- * entries and remove any path encountered. As we expect the index to contain a super-set of
- * the collection paths, a non-empty set represents an invalid index.
- */
- void addMultikeyMetadataPath(const KeyString::Value& ks, IndexInfo* indexInfo);
- void removeMultikeyMetadataPath(const KeyString::Value& ks, IndexInfo* indexInfo);
- size_t getMultikeyMetadataPathCount(IndexInfo* indexInfo);
+protected:
+ struct IndexKeyBucket {
+ uint32_t indexKeyCount;
+ uint32_t bucketSizeBytes;
+ };
+
+ CollectionValidation::ValidateState* _validateState;
+
+ // We map the hashed KeyString values to a bucket that contains the count of how many
+ // index keys and document keys we've seen in each bucket. This counter is unsigned to avoid
+ // undefined behavior in the (unlikely) case of overflow.
+ // Count rules:
+ // - If the count is non-zero for a bucket after all documents and index entries have been
+ // processed, one or more indexes are inconsistent for KeyStrings that map to it.
+ // Otherwise, those keys are consistent for all indexes with a high degree of confidence.
+ // - Absent overflow, if a count interpreted as twos complement integer ends up greater
+ // than zero, there are too few index entries.
+ // - Similarly, if that count ends up less than zero, there are too many index entries.
+
+ std::vector<IndexKeyBucket> _indexKeyBuckets;
+
+ // Whether we're in the first or second phase of index validation.
+ bool _firstPhase;
+
+private:
+ IndexConsistency() = delete;
+}; // IndexConsistency
+
+/**
+ * The KeyStringIndexConsistency class is used to keep track of the index consistency for
+ * KeyString based indexes. It does this by using the index keys from index entries and index keys
+ * generated from the document to ensure there is a one-to-one mapping for each key. In addition, an
+ * IndexObserver class can be hooked into the IndexAccessMethod to inform this class about changes
+ * to the indexes during a validation and compensate for them.
+ */
+class KeyStringIndexConsistency final : protected IndexConsistency {
+ using IndexInfoMap = std::map<std::string, IndexInfo>;
+ using IndexKey = std::pair<std::string, std::string>;
+
+public:
+ KeyStringIndexConsistency(OperationContext* opCtx,
+ CollectionValidation::ValidateState* validateState,
+ size_t numHashBuckets = kNumHashBuckets);
+
+ void setSecondPhase() {
+ IndexConsistency::setSecondPhase();
+ }
/**
- * Returns true if any value in the `_indexKeyCount` map is not equal to 0, otherwise
- * return false.
+ * Traverses the column-store index via 'cursor' and accumulates the traversal results.
*/
- bool haveEntryMismatch() const;
+ int64_t traverseIndex(OperationContext* opCtx,
+ const IndexCatalogEntry* index,
+ ProgressMeterHolder& _progress,
+ ValidateResults* results);
/**
- * Return info on all indexes tracked by this.
+ * Traverses all paths in a single record from the row-store via the given {'recordId','record'}
+ * pair and accumulates the traversal results.
*/
- IndexInfoMap& getIndexInfo() {
- return _indexesInfo;
- }
- IndexInfo& getIndexInfo(const std::string& indexName) {
- return _indexesInfo.at(indexName);
- }
+ void traverseRecord(OperationContext* opCtx,
+ const CollectionPtr& coll,
+ const IndexCatalogEntry* index,
+ const RecordId& recordId,
+ const BSONObj& recordBson,
+ ValidateResults* results);
/**
- * Informs the IndexConsistency object that we're advancing to the second phase of index
- * validation.
+ * Returns true if any value in the `_indexKeyCount` map is not equal to 0, otherwise return
+ * false.
*/
- void setSecondPhase();
+ bool haveEntryMismatch() const;
/**
* If repair mode enabled, try inserting _missingIndexEntries into indexes.
*/
- void repairMissingIndexEntries(OperationContext* opCtx, ValidateResults* results);
+ void repairIndexEntries(OperationContext* opCtx, ValidateResults* results);
/**
* Records the errors gathered from the second phase of index validation into the provided
* ValidateResultsMap and ValidateResults.
*/
- void addIndexEntryErrors(ValidateResults* results);
+ void addIndexEntryErrors(OperationContext* opCtx, ValidateResults* results);
/**
* Sets up this IndexConsistency object to limit memory usage in the second phase of index
@@ -177,35 +203,21 @@ public:
*/
bool limitMemoryUsageForSecondPhase(ValidateResults* result);
-private:
- struct IndexKeyBucket {
- uint32_t indexKeyCount;
- uint32_t bucketSizeBytes;
- };
-
- IndexConsistency() = delete;
+ void validateIndexKeyCount(OperationContext* opCtx,
+ const IndexCatalogEntry* index,
+ long long* numRecords,
+ IndexValidateResults& results);
- CollectionValidation::ValidateState* _validateState;
-
- // We map the hashed KeyString values to a bucket that contains the count of how many
- // index keys and document keys we've seen in each bucket. This counter is unsigned to avoid
- // undefined behavior in the (unlikely) case of overflow.
- // Count rules:
- // - If the count is non-zero for a bucket after all documents and index entries have been
- // processed, one or more indexes are inconsistent for KeyStrings that map to it.
- // Otherwise, those keys are consistent for all indexes with a high degree of confidence.
- // - Absent overflow, if a count interpreted as twos complement integer ends up greater
- // than zero, there are too few index entries.
- // - Similarly, if that count ends up less than zero, there are too many index entries.
+ uint64_t getTotalIndexKeys() {
+ return _totalIndexKeys;
+ }
- std::vector<IndexKeyBucket> _indexKeyBuckets;
+private:
+ KeyStringIndexConsistency() = delete;
// A vector of IndexInfo indexes by index number
IndexInfoMap _indexesInfo;
- // Whether we're in the first or second phase of index validation.
- bool _firstPhase;
-
// Populated during the second phase of validation, this map contains the index entries that
// were pointing at an invalid document key.
// The map contains a IndexKey pointing at a set of BSON objects as there may be multiple
@@ -218,6 +230,55 @@ private:
// index entry for a given IndexKey for each index.
std::map<IndexKey, IndexEntryInfo> _missingIndexEntries;
+ // The total number of index keys is stored during the first validation phase, since this
+ // count may change during a second phase.
+ uint64_t _totalIndexKeys = 0;
+
+ /**
+ * Return info for an index tracked by this with the given 'indexName'.
+ */
+ IndexInfo& getIndexInfo(const std::string& indexName) {
+ return _indexesInfo.at(indexName);
+ }
+
+ /**
+ * During the first phase of validation, given the document's key KeyString, increment the
+ * corresponding `_indexKeyCount` by hashing it.
+ * For the second phase of validation, keep track of the document keys that hashed to
+ * inconsistent hash buckets during the first phase of validation.
+ */
+ void addDocKey(OperationContext* opCtx,
+ const KeyString::Value& ks,
+ IndexInfo* indexInfo,
+ const RecordId& recordId);
+
+ /**
+ * During the first phase of validation, given the index entry's KeyString, decrement the
+ * corresponding `_indexKeyCount` by hashing it.
+ * For the second phase of validation, try to match the index entry keys that hashed to
+ * inconsistent hash buckets during the first phase of validation to document keys.
+ */
+ void addIndexKey(OperationContext* opCtx,
+ const KeyString::Value& ks,
+ IndexInfo* indexInfo,
+ const RecordId& recordId,
+ ValidateResults* results);
+
+ /**
+ * During the first phase of validation, tracks the multikey paths for every observed document.
+ */
+ void addDocumentMultikeyPaths(IndexInfo* indexInfo, const MultikeyPaths& multikeyPaths);
+
+ /**
+ * To validate $** multikey metadata paths, we first scan the collection and add a hash of all
+ * multikey paths encountered to a set. We then scan the index for multikey metadata path
+ * entries and remove any path encountered. As we expect the index to contain a super-set of
+ * the collection paths, a non-empty set represents an invalid index.
+ */
+ void addMultikeyMetadataPath(const KeyString::Value& ks, IndexInfo* indexInfo);
+ void removeMultikeyMetadataPath(const KeyString::Value& ks, IndexInfo* indexInfo);
+ size_t getMultikeyMetadataPathCount(IndexInfo* indexInfo);
+
/**
* Generates a key for the second phase of validation. The keys format is the following:
* {
@@ -241,5 +302,5 @@ private:
*/
uint32_t _hashKeyString(const KeyString::Value& ks, uint32_t indexNameHash) const;
-}; // IndexConsistency
+}; // KeyStringIndexConsistency
} // namespace mongo
diff --git a/src/mongo/db/catalog/validate_adaptor.cpp b/src/mongo/db/catalog/validate_adaptor.cpp
index 64e2f3fe151..035f5be1eb2 100644
--- a/src/mongo/db/catalog/validate_adaptor.cpp
+++ b/src/mongo/db/catalog/validate_adaptor.cpp
@@ -39,11 +39,13 @@
#include "mongo/bson/util/bsoncolumn.h"
#include "mongo/db/catalog/clustered_collection_util.h"
#include "mongo/db/catalog/collection.h"
+#include "mongo/db/catalog/column_index_consistency.h"
#include "mongo/db/catalog/index_catalog.h"
#include "mongo/db/catalog/index_consistency.h"
#include "mongo/db/catalog/throttle_cursor.h"
#include "mongo/db/concurrency/exception_util.h"
#include "mongo/db/curop.h"
+#include "mongo/db/index/columns_access_method.h"
#include "mongo/db/index/index_access_method.h"
#include "mongo/db/index/index_descriptor.h"
#include "mongo/db/index/wildcard_access_method.h"
@@ -71,11 +73,8 @@ namespace mongo {
namespace {
-MONGO_FAIL_POINT_DEFINE(crashOnMultikeyValidateFailure);
-
// Set limit for size of corrupted records that will be reported.
const long long kMaxErrorSizeBytes = 1 * 1024 * 1024;
-const long long kInterruptIntervalNumRecords = 4096;
const long long kInterruptIntervalNumBytes = 50 * 1024 * 1024; // 50MB.
static constexpr const char* kSchemaValidationFailedReason =
@@ -575,373 +574,19 @@ Status ValidateAdaptor::validateRecord(OperationContext* opCtx,
results);
}
- auto& executionCtx = StorageExecutionContext::get(opCtx);
SharedBufferFragmentBuilder pool(KeyString::HeapBuilder::kHeapAllocatorDefaultBytes);
for (const auto& index : _validateState->getIndexes()) {
const IndexDescriptor* descriptor = index->descriptor();
- auto iam = index->accessMethod()->asSortedData();
-
if (descriptor->isPartial() && !index->getFilterExpression()->matchesBSON(recordBson))
continue;
- auto documentKeySet = executionCtx.keys();
- auto multikeyMetadataKeys = executionCtx.multikeyMetadataKeys();
- auto documentMultikeyPaths = executionCtx.multikeyPaths();
-
- iam->getKeys(opCtx,
- coll,
- pool,
- recordBson,
- InsertDeleteOptions::ConstraintEnforcementMode::kEnforceConstraints,
- SortedDataIndexAccessMethod::GetKeysContext::kAddingKeys,
- documentKeySet.get(),
- multikeyMetadataKeys.get(),
- documentMultikeyPaths.get(),
- recordId);
-
- bool shouldBeMultikey = iam->shouldMarkIndexAsMultikey(
- documentKeySet->size(),
- {multikeyMetadataKeys->begin(), multikeyMetadataKeys->end()},
- *documentMultikeyPaths);
-
- if (!index->isMultikey(opCtx, coll) && shouldBeMultikey) {
- if (_validateState->fixErrors()) {
- writeConflictRetry(opCtx, "setIndexAsMultikey", coll->ns().ns(), [&] {
- WriteUnitOfWork wuow(opCtx);
- coll->getIndexCatalog()->setMultikeyPaths(
- opCtx, coll, descriptor, *multikeyMetadataKeys, *documentMultikeyPaths);
- wuow.commit();
- });
-
- LOGV2(4614700,
- "Index set to multikey",
- "indexName"_attr = descriptor->indexName(),
- "collection"_attr = coll->ns().ns());
- results->warnings.push_back(str::stream() << "Index " << descriptor->indexName()
- << " set to multikey.");
- results->repaired = true;
- } else {
- auto& curRecordResults = (results->indexResultsMap)[descriptor->indexName()];
- std::string msg = fmt::format(
- "Index {} is not multikey but has more than one key in document with "
- "RecordId({}) and {}",
- descriptor->indexName(),
- recordId.toString(),
- recordBson.getField("_id").toString());
- curRecordResults.errors.push_back(msg);
- curRecordResults.valid = false;
- if (crashOnMultikeyValidateFailure.shouldFail()) {
- invariant(false, msg);
- }
- }
- }
-
- if (index->isMultikey(opCtx, coll)) {
- const MultikeyPaths& indexPaths = index->getMultikeyPaths(opCtx, coll);
- if (!MultikeyPathTracker::covers(indexPaths, *documentMultikeyPaths.get())) {
- if (_validateState->fixErrors()) {
- writeConflictRetry(opCtx, "increaseMultikeyPathCoverage", coll->ns().ns(), [&] {
- WriteUnitOfWork wuow(opCtx);
- coll->getIndexCatalog()->setMultikeyPaths(
- opCtx, coll, descriptor, *multikeyMetadataKeys, *documentMultikeyPaths);
- wuow.commit();
- });
-
- LOGV2(4614701,
- "Multikey paths updated to cover multikey document",
- "indexName"_attr = descriptor->indexName(),
- "collection"_attr = coll->ns().ns());
- results->warnings.push_back(str::stream() << "Index " << descriptor->indexName()
- << " multikey paths updated.");
- results->repaired = true;
- } else {
- std::string msg = fmt::format(
- "Index {} multikey paths do not cover a document with RecordId({}) and {}",
- descriptor->indexName(),
- recordId.toString(),
- recordBson.getField("_id").toString());
- auto& curRecordResults = (results->indexResultsMap)[descriptor->indexName()];
- curRecordResults.errors.push_back(msg);
- curRecordResults.valid = false;
- }
- }
- }
-
- IndexInfo& indexInfo = _indexConsistency->getIndexInfo(descriptor->indexName());
- if (shouldBeMultikey) {
- indexInfo.multikeyDocs = true;
- }
-
- // An empty set of multikey paths indicates that this index does not track path-level
- // multikey information and we should do no tracking.
- if (shouldBeMultikey && documentMultikeyPaths->size()) {
- _indexConsistency->addDocumentMultikeyPaths(&indexInfo, *documentMultikeyPaths);
- }
-
- for (const auto& keyString : *multikeyMetadataKeys) {
- try {
- _indexConsistency->addMultikeyMetadataPath(keyString, &indexInfo);
- } catch (...) {
- return exceptionToStatus();
- }
- }
- for (const auto& keyString : *documentKeySet) {
- try {
- _totalIndexKeys++;
- _indexConsistency->addDocKey(opCtx, keyString, &indexInfo, recordId);
- } catch (...) {
- return exceptionToStatus();
- }
- }
+ this->traverseRecord(opCtx, coll, index.get(), recordId, recordBson, results);
}
return Status::OK();
}
-namespace {
-// Ensures that index entries are in increasing or decreasing order.
-void _validateKeyOrder(OperationContext* opCtx,
- const IndexCatalogEntry* index,
- const KeyString::Value& currKey,
- const KeyString::Value& prevKey,
- IndexValidateResults* results) {
- auto descriptor = index->descriptor();
- bool unique = descriptor->unique();
-
- // KeyStrings will be in strictly increasing order because all keys are sorted and they are in
- // the format (Key, RID), and all RecordIDs are unique.
- if (currKey.compare(prevKey) <= 0) {
- if (results && results->valid) {
- results->errors.push_back(str::stream()
- << "index '" << descriptor->indexName()
- << "' is not in strictly ascending or descending order");
- }
- if (results) {
- results->valid = false;
- }
- return;
- }
-
- if (unique) {
- // Unique indexes must not have duplicate keys.
- int cmp = currKey.compareWithoutRecordIdLong(prevKey);
- if (cmp != 0) {
- return;
- }
-
- if (results && results->valid) {
- auto bsonKey = KeyString::toBson(currKey, Ordering::make(descriptor->keyPattern()));
- auto firstRecordId =
- KeyString::decodeRecordIdLongAtEnd(prevKey.getBuffer(), prevKey.getSize());
- auto secondRecordId =
- KeyString::decodeRecordIdLongAtEnd(currKey.getBuffer(), currKey.getSize());
- results->errors.push_back(str::stream() << "Unique index '" << descriptor->indexName()
- << "' has duplicate key: " << bsonKey
- << ", first record: " << firstRecordId
- << ", second record: " << secondRecordId);
- }
- if (results) {
- results->valid = false;
- }
- }
-}
-} // namespace
-
-void ValidateAdaptor::traverseIndex(OperationContext* opCtx,
- const IndexCatalogEntry* index,
- int64_t* numTraversedKeys,
- ValidateResults* results) {
- const IndexDescriptor* descriptor = index->descriptor();
- auto indexName = descriptor->indexName();
- auto& indexResults = results->indexResultsMap[indexName];
- IndexInfo& indexInfo = _indexConsistency->getIndexInfo(indexName);
- int64_t numKeys = 0;
-
- bool isFirstEntry = true;
-
- // The progress meter will be inactive after traversing the record store to allow the message
- // and the total to be set to different values.
- if (!_progress.get(WithLock::withoutLock())->isActive()) {
- const char* curopMessage = "Validate: scanning index entries";
- stdx::unique_lock<Client> lk(*opCtx->getClient());
- _progress.set(
- lk, CurOp::get(opCtx)->setProgress_inlock(curopMessage, _totalIndexKeys), opCtx);
- }
-
- const KeyString::Version version =
- index->accessMethod()->asSortedData()->getSortedDataInterface()->getKeyStringVersion();
-
- KeyString::Builder firstKeyStringBuilder(
- version, BSONObj(), indexInfo.ord, KeyString::Discriminator::kExclusiveBefore);
- KeyString::Value firstKeyString = firstKeyStringBuilder.getValueCopy();
- KeyString::Value prevIndexKeyStringValue;
-
- // Ensure that this index has an open index cursor.
- const auto indexCursorIt = _validateState->getIndexCursors().find(indexName);
- invariant(indexCursorIt != _validateState->getIndexCursors().end());
-
- const std::unique_ptr<SortedDataInterfaceThrottleCursor>& indexCursor = indexCursorIt->second;
-
- boost::optional<KeyStringEntry> indexEntry;
- try {
- indexEntry = indexCursor->seekForKeyString(opCtx, firstKeyString);
- } catch (const DBException& ex) {
- if (TestingProctor::instance().isEnabled() && ex.code() != ErrorCodes::WriteConflict) {
- LOGV2_FATAL(5318400,
- "Error seeking to first key",
- "error"_attr = ex.toString(),
- "index"_attr = indexName,
- "key"_attr = firstKeyString.toString());
- }
- throw;
- }
-
- const auto keyFormat =
- index->accessMethod()->asSortedData()->getSortedDataInterface()->rsKeyFormat();
- const RecordId kWildcardMultikeyMetadataRecordId = record_id_helpers::reservedIdFor(
- record_id_helpers::ReservationId::kWildcardMultikeyMetadataId, keyFormat);
-
- // Warn about unique indexes with keys in old format (without record id).
- bool foundOldUniqueIndexKeys = false;
-
- while (indexEntry) {
- if (!isFirstEntry) {
- _validateKeyOrder(
- opCtx, index, indexEntry->keyString, prevIndexKeyStringValue, &indexResults);
- }
-
- if (!foundOldUniqueIndexKeys && !descriptor->isIdIndex() && descriptor->unique() &&
- !indexCursor->isRecordIdAtEndOfKeyString()) {
- results->warnings.push_back(
- fmt::format("Unique index {} has one or more keys in the old format (without "
- "embedded record id). First record: {}",
- indexInfo.indexName,
- indexEntry->loc.toString()));
- foundOldUniqueIndexKeys = true;
- }
-
- bool isMetadataKey = indexEntry->loc == kWildcardMultikeyMetadataRecordId;
- if (descriptor->getIndexType() == IndexType::INDEX_WILDCARD && isMetadataKey) {
- _indexConsistency->removeMultikeyMetadataPath(indexEntry->keyString, &indexInfo);
- } else {
- try {
- _indexConsistency->addIndexKey(
- opCtx, indexEntry->keyString, &indexInfo, indexEntry->loc, results);
- } catch (const DBException& e) {
- StringBuilder ss;
- ss << "Parsing index key for " << indexInfo.indexName << " recId "
- << indexEntry->loc << " threw exception " << e.toString();
- results->errors.push_back(ss.str());
- results->valid = false;
- }
- }
- {
- stdx::unique_lock<Client> lk(*opCtx->getClient());
- _progress.get(lk)->hit();
- }
- numKeys++;
- isFirstEntry = false;
- prevIndexKeyStringValue = indexEntry->keyString;
-
- if (numKeys % kInterruptIntervalNumRecords == 0) {
- // Periodically checks for interrupts and yields.
- opCtx->checkForInterrupt();
- _validateState->yield(opCtx);
- }
-
- try {
- indexEntry = indexCursor->nextKeyString(opCtx);
- } catch (const DBException& ex) {
- if (TestingProctor::instance().isEnabled() && ex.code() != ErrorCodes::WriteConflict) {
- LOGV2_FATAL(5318401,
- "Error advancing index cursor",
- "error"_attr = ex.toString(),
- "index"_attr = indexName,
- "prevKey"_attr = prevIndexKeyStringValue.toString());
- }
- throw;
- }
- }
-
- if (results && _indexConsistency->getMultikeyMetadataPathCount(&indexInfo) > 0) {
- results->errors.push_back(str::stream()
- << "Index '" << descriptor->indexName()
- << "' has one or more missing multikey metadata index keys");
- results->valid = false;
- }
-
- // Adjust multikey metadata when allowed. These states are all allowed by the design of
- // multikey. A collection should still be valid without these adjustments.
- if (_validateState->adjustMultikey()) {
-
- // If this collection has documents that make this index multikey, then check whether those
- // multikey paths match the index's metadata.
- auto indexPaths = index->getMultikeyPaths(opCtx, _validateState->getCollection());
- auto& documentPaths = indexInfo.docMultikeyPaths;
- if (indexInfo.multikeyDocs && documentPaths != indexPaths) {
- LOGV2(5367500,
- "Index's multikey paths do not match those of its documents",
- "index"_attr = descriptor->indexName(),
- "indexPaths"_attr = MultikeyPathTracker::dumpMultikeyPaths(indexPaths),
- "documentPaths"_attr = MultikeyPathTracker::dumpMultikeyPaths(documentPaths));
-
- // Since we have the correct multikey path information for this index, we can tighten up
- // its metadata to improve query performance. This may apply in two distinct scenarios:
- // 1. Collection data has changed such that the current multikey paths on the index
- // are too permissive and certain document paths are no longer multikey.
- // 2. This index was built before 3.4, and there is no multikey path information for
- // the index. We can effectively 'upgrade' the index so that it does not need to be
- // rebuilt to update this information.
- writeConflictRetry(opCtx, "updateMultikeyPaths", _validateState->nss().ns(), [&]() {
- WriteUnitOfWork wuow(opCtx);
- auto writeableIndex = const_cast<IndexCatalogEntry*>(index);
- const bool isMultikey = true;
- writeableIndex->forceSetMultikey(
- opCtx, _validateState->getCollection(), isMultikey, documentPaths);
- wuow.commit();
- });
-
- if (results) {
- results->warnings.push_back(str::stream() << "Updated index multikey metadata"
- << ": " << descriptor->indexName());
- results->repaired = true;
- }
- }
-
- // If this index does not need to be multikey, then unset the flag.
- if (index->isMultikey(opCtx, _validateState->getCollection()) && !indexInfo.multikeyDocs) {
- invariant(!indexInfo.docMultikeyPaths.size());
-
- LOGV2(5367501,
- "Index is multikey but there are no multikey documents",
- "index"_attr = descriptor->indexName());
-
- // This makes an improvement in the case that no documents make the index multikey and
- // the flag can be unset entirely. This may be due to a change in the data or historical
- // multikey bugs that have persisted incorrect multikey infomation.
- writeConflictRetry(opCtx, "unsetMultikeyPaths", _validateState->nss().ns(), [&]() {
- WriteUnitOfWork wuow(opCtx);
- auto writeableIndex = const_cast<IndexCatalogEntry*>(index);
- const bool isMultikey = false;
- writeableIndex->forceSetMultikey(
- opCtx, _validateState->getCollection(), isMultikey, {});
- wuow.commit();
- });
-
- if (results) {
- results->warnings.push_back(str::stream() << "Unset index multikey metadata"
- << ": " << descriptor->indexName());
- results->repaired = true;
- }
- }
- }
-
- if (numTraversedKeys) {
- *numTraversedKeys = numKeys;
- }
-}
-
void ValidateAdaptor::traverseRecordStore(OperationContext* opCtx,
ValidateResults* results,
BSONObjBuilder* output) {
@@ -1093,7 +738,7 @@ void ValidateAdaptor::traverseRecordStore(OperationContext* opCtx,
prevRecordId = record->id;
- if (_numRecords % kInterruptIntervalNumRecords == 0 ||
+ if (_numRecords % IndexConsistency::kInterruptIntervalNumRecords == 0 ||
interruptIntervalNumBytes >= kInterruptIntervalNumBytes) {
// Periodically checks for interrupts and yields.
opCtx->checkForInterrupt();
@@ -1125,78 +770,90 @@ void ValidateAdaptor::traverseRecordStore(OperationContext* opCtx,
}
}
+bool isColumnStoreIndex(const IndexCatalogEntry* index) {
+ return index->descriptor()->getAccessMethodName() == IndexNames::COLUMN;
+}
+
void ValidateAdaptor::validateIndexKeyCount(OperationContext* opCtx,
const IndexCatalogEntry* index,
IndexValidateResults& results) {
- // Fetch the total number of index entries we previously found traversing the index.
- const IndexDescriptor* desc = index->descriptor();
- const std::string indexName = desc->indexName();
- IndexInfo* indexInfo = &_indexConsistency->getIndexInfo(indexName);
- auto numTotalKeys = indexInfo->numKeys;
-
- // Update numRecords by subtracting number of records removed from record store in repair mode
- // when validating index consistency
- _numRecords -= results.keysRemovedFromRecordStore;
-
- // Do not fail on finding too few index entries compared to collection entries when full:false.
- bool hasTooFewKeys = false;
- bool noErrorOnTooFewKeys = !_validateState->isFullIndexValidation();
-
- if (desc->isIdIndex() && numTotalKeys != _numRecords) {
- hasTooFewKeys = (numTotalKeys < _numRecords);
- std::string msg = str::stream()
- << "number of _id index entries (" << numTotalKeys
- << ") does not match the number of documents in the index (" << _numRecords << ")";
- if (noErrorOnTooFewKeys && (numTotalKeys < _numRecords)) {
- results.warnings.push_back(msg);
- } else {
- results.errors.push_back(msg);
- results.valid = false;
- }
+ if (isColumnStoreIndex(index)) {
+ _columnIndexConsistency.validateIndexKeyCount(opCtx, index, &_numRecords, results);
+ } else {
+ _keyBasedIndexConsistency.validateIndexKeyCount(opCtx, index, &_numRecords, results);
}
+}
- // Hashed indexes may never be multikey.
- if (desc->getAccessMethodName() == IndexNames::HASHED &&
- index->isMultikey(opCtx, _validateState->getCollection())) {
- results.errors.push_back(str::stream() << "Hashed index is incorrectly marked multikey: "
- << desc->indexName());
- results.valid = false;
+void ValidateAdaptor::traverseIndex(OperationContext* opCtx,
+ const IndexCatalogEntry* index,
+ int64_t* numTraversedKeys,
+ ValidateResults* results) {
+ // The progress meter will be inactive after traversing the record store to allow the message
+ // and the total to be set to different values.
+ if (!_progress.get(WithLock::withoutLock())->isActive()) {
+ const char* curopMessage = "Validate: scanning index entries";
+ stdx::unique_lock<Client> lk(*opCtx->getClient());
+ _progress.set(lk,
+ CurOp::get(opCtx)->setProgress_inlock(
+ curopMessage,
+ isColumnStoreIndex(index)
+ ? _columnIndexConsistency.getTotalIndexKeys()
+ : _keyBasedIndexConsistency.getTotalIndexKeys()),
+ opCtx);
}
- // Confirm that the number of index entries is not greater than the number of documents in the
- // collection. This check is only valid for indexes that are not multikey (indexed arrays
- // produce an index key per array entry) and not $** indexes which can produce index keys for
- // multiple paths within a single document.
- if (results.valid && !index->isMultikey(opCtx, _validateState->getCollection()) &&
- desc->getIndexType() != IndexType::INDEX_WILDCARD && numTotalKeys > _numRecords) {
- std::string err = str::stream()
- << "index " << desc->indexName() << " is not multi-key, but has more entries ("
- << numTotalKeys << ") than documents in the index (" << _numRecords << ")";
- results.errors.push_back(err);
- results.valid = false;
+ int64_t numKeys = 0;
+ if (isColumnStoreIndex(index)) {
+ numKeys += _columnIndexConsistency.traverseIndex(opCtx, index, _progress, results);
+ } else {
+ numKeys += _keyBasedIndexConsistency.traverseIndex(opCtx, index, _progress, results);
}
- // Ignore any indexes with a special access method. If an access method name is given, the
- // index may be a full text, geo or special index plugin with different semantics.
- if (results.valid && !desc->isSparse() && !desc->isPartial() && !desc->isIdIndex() &&
- desc->getAccessMethodName() == "" && numTotalKeys < _numRecords) {
- hasTooFewKeys = true;
- std::string msg = str::stream()
- << "index " << desc->indexName() << " is not sparse or partial, but has fewer entries ("
- << numTotalKeys << ") than documents in the index (" << _numRecords << ")";
- if (noErrorOnTooFewKeys) {
- results.warnings.push_back(msg);
- } else {
- results.errors.push_back(msg);
- results.valid = false;
- }
+ if (numTraversedKeys) {
+ *numTraversedKeys = numKeys;
}
+}
- if (!_validateState->isFullIndexValidation() && hasTooFewKeys) {
- std::string warning = str::stream()
- << "index " << desc->indexName() << " has fewer keys than records."
- << " Please re-run the validate command with {full: true}";
- results.warnings.push_back(warning);
+void ValidateAdaptor::traverseRecord(OperationContext* opCtx,
+ const CollectionPtr& coll,
+ const IndexCatalogEntry* index,
+ const RecordId& recordId,
+ const BSONObj& record,
+ ValidateResults* results) {
+ if (isColumnStoreIndex(index)) {
+ _columnIndexConsistency.traverseRecord(opCtx, coll, index, recordId, record, results);
+ } else {
+ _keyBasedIndexConsistency.traverseRecord(opCtx, coll, index, recordId, record, results);
}
}
+
+void ValidateAdaptor::setSecondPhase() {
+ _columnIndexConsistency.setSecondPhase();
+ _keyBasedIndexConsistency.setSecondPhase();
+}
+
+bool ValidateAdaptor::limitMemoryUsageForSecondPhase(ValidateResults* result) {
+ bool retVal = true;
+ retVal &= _columnIndexConsistency.limitMemoryUsageForSecondPhase(result);
+ retVal &= _keyBasedIndexConsistency.limitMemoryUsageForSecondPhase(result);
+ return retVal;
+}
+
+bool ValidateAdaptor::haveEntryMismatch() const {
+ bool retVal = false;
+ retVal |= _columnIndexConsistency.haveEntryMismatch();
+ retVal |= _keyBasedIndexConsistency.haveEntryMismatch();
+ return retVal;
+}
+
+void ValidateAdaptor::repairIndexEntries(OperationContext* opCtx, ValidateResults* results) {
+ _columnIndexConsistency.repairIndexEntries(opCtx, results);
+ _keyBasedIndexConsistency.repairIndexEntries(opCtx, results);
+}
+
+void ValidateAdaptor::addIndexEntryErrors(OperationContext* opCtx, ValidateResults* results) {
+ _columnIndexConsistency.addIndexEntryErrors(opCtx, results);
+ _keyBasedIndexConsistency.addIndexEntryErrors(opCtx, results);
+}
+
} // namespace mongo
diff --git a/src/mongo/db/catalog/validate_adaptor.h b/src/mongo/db/catalog/validate_adaptor.h
index 3217c7b5824..07d05164623 100644
--- a/src/mongo/db/catalog/validate_adaptor.h
+++ b/src/mongo/db/catalog/validate_adaptor.h
@@ -29,12 +29,15 @@
#pragma once
+#include <array>
+
+#include "mongo/db/catalog/column_index_consistency.h"
+#include "mongo/db/catalog/index_consistency.h"
#include "mongo/db/catalog/validate_state.h"
#include "mongo/util/progress_meter.h"
namespace mongo {
-class IndexConsistency;
class IndexDescriptor;
class OperationContext;
@@ -44,10 +47,11 @@ class OperationContext;
*/
class ValidateAdaptor {
public:
- ValidateAdaptor(IndexConsistency* indexConsistency,
- CollectionValidation::ValidateState* validateState)
+ ValidateAdaptor(OperationContext* opCtx, CollectionValidation::ValidateState* validateState)
- : _indexConsistency(indexConsistency), _validateState(validateState) {}
+ : _keyBasedIndexConsistency(opCtx, validateState),
+ _columnIndexConsistency(opCtx, validateState),
+ _validateState(validateState) {}
/**
* Validates the record data and traverses through its key set to keep track of the
@@ -59,6 +63,13 @@ public:
long long* nNonCompliantDocuments,
size_t* dataSize,
ValidateResults* results);
+ /**
+ * Traverses the record store to retrieve every record and go through its document key
+ * set to keep track of the index consistency during a validation.
+ */
+ void traverseRecordStore(OperationContext* opCtx,
+ ValidateResults* results,
+ BSONObjBuilder* output);
/**
* Traverses the index getting index entries to validate them and keep track of the index keys
@@ -70,12 +81,14 @@ public:
ValidateResults* results);
/**
- * Traverses the record store to retrieve every record and go through its document key
- * set to keep track of the index consistency during a validation.
+ * Traverses a record on the underlying index consistency objects.
*/
- void traverseRecordStore(OperationContext* opCtx,
- ValidateResults* results,
- BSONObjBuilder* output);
+ void traverseRecord(OperationContext* opCtx,
+ const CollectionPtr& coll,
+ const IndexCatalogEntry* index,
+ const RecordId& recordId,
+ const BSONObj& record,
+ ValidateResults* results);
/**
* Validates that the number of document keys matches the number of index keys previously
@@ -85,8 +98,38 @@ public:
const IndexCatalogEntry* index,
IndexValidateResults& results);
+ /**
+ * Informs the index consistency objects that we're advancing to the second phase of index
+ * validation.
+ */
+ void setSecondPhase();
+
+ /**
+ * Sets up the index consistency objects to limit memory usage in the second phase of index
+ * validation. Returns whether the memory limit is sufficient to report at least one index entry
+ * inconsistency and continue with the second phase of validation.
+ */
+ bool limitMemoryUsageForSecondPhase(ValidateResults* result);
+
+ /**
+ * Returns true if the underlying index consistency objects have entry mismatches.
+ */
+ bool haveEntryMismatch() const;
+
+ /**
+ * If repair mode enabled, try inserting _missingIndexEntries into indexes.
+ */
+ void repairIndexEntries(OperationContext* opCtx, ValidateResults* results);
+
+ /**
+ * Records the errors gathered from the second phase of index validation into the provided
+ * ValidateResultsMap and ValidateResults.
+ */
+ void addIndexEntryErrors(OperationContext* opCtx, ValidateResults* results);
+
private:
- IndexConsistency* _indexConsistency;
+ KeyStringIndexConsistency _keyBasedIndexConsistency;
+ ColumnIndexConsistency _columnIndexConsistency;
CollectionValidation::ValidateState* _validateState;
// Saves the record count from the record store traversal to be used later to validate the index
@@ -95,9 +138,5 @@ private:
// For reporting progress during record store and index traversal.
ProgressMeterHolder _progress;
-
- // The total number of index keys is stored during the first validation phase, since this
- // count may change during a second phase.
- uint64_t _totalIndexKeys = 0;
};
} // namespace mongo
diff --git a/src/mongo/db/catalog/validate_state.cpp b/src/mongo/db/catalog/validate_state.cpp
index 9f0fd873f59..304f664005b 100644
--- a/src/mongo/db/catalog/validate_state.cpp
+++ b/src/mongo/db/catalog/validate_state.cpp
@@ -38,6 +38,7 @@
#include "mongo/db/catalog/index_consistency.h"
#include "mongo/db/catalog/validate_adaptor.h"
#include "mongo/db/db_raii.h"
+#include "mongo/db/index/columns_access_method.h"
#include "mongo/db/index/index_access_method.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/storage/durable_catalog.h"
@@ -192,6 +193,9 @@ void ValidateState::_yieldCursors(OperationContext* opCtx) {
for (const auto& indexCursor : _indexCursors) {
indexCursor.second->save();
}
+ for (const auto& indexCursor : _columnStoreIndexCursors) {
+ indexCursor.second->save();
+ }
_traverseRecordStoreCursor->save();
_seekRecordStoreCursor->save();
@@ -200,6 +204,9 @@ void ValidateState::_yieldCursors(OperationContext* opCtx) {
for (const auto& indexCursor : _indexCursors) {
indexCursor.second->restore();
}
+ for (const auto& indexCursor : _columnStoreIndexCursors) {
+ indexCursor.second->restore();
+ }
uassert(ErrorCodes::Interrupted,
"Interrupted due to: failure to restore yielded traverse cursor",
@@ -209,9 +216,16 @@ void ValidateState::_yieldCursors(OperationContext* opCtx) {
_seekRecordStoreCursor->restore());
}
+bool ValidateState::_isIndexDataCheckpointed(OperationContext* opCtx,
+ const IndexCatalogEntry* entry) {
+ return isBackground() &&
+ opCtx->getServiceContext()->getStorageEngine()->isInIndividuallyCheckpointedIndexes(
+ entry->getIdent());
+}
+
void ValidateState::initializeCursors(OperationContext* opCtx) {
invariant(!_traverseRecordStoreCursor && !_seekRecordStoreCursor && _indexCursors.size() == 0 &&
- _indexes.size() == 0);
+ _columnStoreIndexCursors.size() == 0 && _indexes.size() == 0);
// Background validation reads from the last stable checkpoint instead of the latest data. This
// allows concurrent writes to go ahead without interfering with validation's view of the data.
@@ -292,27 +306,51 @@ void ValidateState::initializeCursors(OperationContext* opCtx) {
continue;
}
- auto iam = entry->accessMethod()->asSortedData();
- if (!iam)
- continue;
-
- _indexCursors.emplace(
- desc->indexName(),
- std::make_unique<SortedDataInterfaceThrottleCursor>(opCtx, iam, &_dataThrottle));
+ if (entry->descriptor()->getAccessMethodName() == IndexNames::COLUMN) {
+ const auto iam = entry->accessMethod();
+ _columnStoreIndexCursors.emplace(
+ desc->indexName(),
+ static_cast<ColumnStoreAccessMethod*>(iam)->storage()->newCursor(opCtx));
+
+ // Skip any newly created indexes that, because they were built with a WT bulk loader,
+ // are checkpoint'ed but not yet consistent with the rest of checkpoint's PIT view of
+ // the data.
+ if (_isIndexDataCheckpointed(opCtx, entry)) {
+ _columnStoreIndexCursors.erase(desc->indexName());
+ LOGV2(
+ 7106110,
+ "Skipping background validation on the index because the index data is not yet "
+ "consistent in the checkpoint.",
+ "desc_indexName"_attr = desc->indexName(),
+ "nss"_attr = _nss);
+ continue;
+ }
+ } else {
+ const auto iam = entry->accessMethod()->asSortedData();
+ if (!iam) {
+ LOGV2(6325100,
+ "[Debugging] skipping index {index_name} because it isn't SortedData",
+ "index_name"_attr = desc->indexName());
+ continue;
+ }
- // Skip any newly created indexes that, because they were built with a WT bulk loader,
- // are checkpoint'ed but not yet consistent with the rest of checkpoint's PIT view of
- // the data.
- if (isBackground() &&
- opCtx->getServiceContext()->getStorageEngine()->isInIndividuallyCheckpointedIndexes(
- diskIndexIdent)) {
- _indexCursors.erase(desc->indexName());
- LOGV2(6868903,
- "Skipping background validation on the index because the index data is not yet "
- "consistent in the checkpoint.",
- "desc_indexName"_attr = desc->indexName(),
- "nss"_attr = _nss);
- continue;
+ _indexCursors.emplace(
+ desc->indexName(),
+ std::make_unique<SortedDataInterfaceThrottleCursor>(opCtx, iam, &_dataThrottle));
+
+ // Skip any newly created indexes that, because they were built with a WT bulk loader,
+ // are checkpoint'ed but not yet consistent with the rest of checkpoint's PIT view of
+ // the data.
+ if (_isIndexDataCheckpointed(opCtx, entry)) {
+ _indexCursors.erase(desc->indexName());
+ LOGV2(
+ 6868903,
+ "Skipping background validation on the index because the index data is not yet "
+ "consistent in the checkpoint.",
+ "desc_indexName"_attr = desc->indexName(),
+ "nss"_attr = _nss);
+ continue;
+ }
}
_indexes.push_back(indexCatalog->getEntryShared(desc));
diff --git a/src/mongo/db/catalog/validate_state.h b/src/mongo/db/catalog/validate_state.h
index c508a6b5850..a5902ca52ff 100644
--- a/src/mongo/db/catalog/validate_state.h
+++ b/src/mongo/db/catalog/validate_state.h
@@ -148,8 +148,7 @@ public:
/**
* Map of index names to index cursors.
*/
- const std::map<std::string, std::unique_ptr<SortedDataInterfaceThrottleCursor>>&
- getIndexCursors() const {
+ const StringMap<std::unique_ptr<SortedDataInterfaceThrottleCursor>>& getIndexCursors() const {
return _indexCursors;
}
@@ -161,6 +160,10 @@ public:
return _seekRecordStoreCursor;
}
+ const StringMap<std::unique_ptr<ColumnStore::Cursor>>& getColumnStoreCursors() const {
+ return _columnStoreIndexCursors;
+ }
+
RecordId getFirstRecordId() const {
return _firstRecordId;
}
@@ -227,6 +230,8 @@ private:
*/
void _yieldCursors(OperationContext* opCtx);
+ bool _isIndexDataCheckpointed(OperationContext* opCtx, const IndexCatalogEntry* entry);
+
NamespaceString _nss;
ValidateMode _mode;
RepairMode _repairMode;
@@ -253,9 +258,10 @@ private:
std::vector<std::shared_ptr<const IndexCatalogEntry>> _indexes;
// Shared cursors to be used during validation, created in 'initializeCursors()'.
- std::map<std::string, std::unique_ptr<SortedDataInterfaceThrottleCursor>> _indexCursors;
+ StringMap<std::unique_ptr<SortedDataInterfaceThrottleCursor>> _indexCursors;
std::unique_ptr<SeekableRecordThrottleCursor> _traverseRecordStoreCursor;
std::unique_ptr<SeekableRecordThrottleCursor> _seekRecordStoreCursor;
+ StringMap<std::unique_ptr<ColumnStore::Cursor>> _columnStoreIndexCursors;
RecordId _firstRecordId;
diff --git a/src/mongo/db/index/columns_access_method.cpp b/src/mongo/db/index/columns_access_method.cpp
index 3aa913a0f19..96af1ddfd50 100644
--- a/src/mongo/db/index/columns_access_method.cpp
+++ b/src/mongo/db/index/columns_access_method.cpp
@@ -265,7 +265,6 @@ Status ColumnStoreAccessMethod::BulkBuilder::keyCommitted(
return Status::OK();
}
-
void ColumnStoreAccessMethod::_visitCellsForIndexInsert(
OperationContext* opCtx,
PooledFragmentBuilder& buf,
@@ -375,6 +374,9 @@ Status ColumnStoreAccessMethod::update(OperationContext* opCtx,
[&](column_keygen::ColumnKeyGenerator::DiffAction diffAction,
StringData path,
const column_keygen::UnencodedCellView* cell) {
+ // TODO SERVER-72351: This is a temporary fix for SERVER-72351.
+ ON_BLOCK_EXIT([&]() { columnKeys->clear(); });
+
if (diffAction == column_keygen::ColumnKeyGenerator::DiffAction::kDelete) {
columnKeys->emplace_back(std::make_tuple(path.toString(), "", rid));
int64_t removed = 0;
diff --git a/src/mongo/db/index/columns_access_method.h b/src/mongo/db/index/columns_access_method.h
index 8d67306fdd7..cbb195c52fa 100644
--- a/src/mongo/db/index/columns_access_method.h
+++ b/src/mongo/db/index/columns_access_method.h
@@ -48,6 +48,10 @@ public:
ColumnStoreAccessMethod(IndexCatalogEntry* ice, std::unique_ptr<ColumnStore>);
+ const column_keygen::ColumnKeyGenerator& getKeyGen() const {
+ return _keyGen;
+ }
+
/**
* Returns a pointer to the ColumnstoreProjection owned by the underlying ColumnKeyGenerator.
*/
@@ -123,8 +127,16 @@ public:
return _store.get();
}
+ ColumnStore* writableStorage() const {
+ return _store.get();
+ }
+
class BulkBuilder;
+ const std::string& indexName() const {
+ return _descriptor->indexName();
+ }
+
/**
* Returns true iff 'compressor' is a recognized name of a block compression module that is
* supported for use with the column store index.
diff --git a/src/mongo/db/storage/column_store.h b/src/mongo/db/storage/column_store.h
index 3886054a022..d22b8386dc6 100644
--- a/src/mongo/db/storage/column_store.h
+++ b/src/mongo/db/storage/column_store.h
@@ -53,6 +53,18 @@ struct FullCellView {
CellView value;
};
+/**
+ * An owned representation of a column-store index entry cell.
+ */
+struct FullCellValue {
+ PathValue path;
+ RowId rid;
+ CellValue value;
+
+ FullCellValue(FullCellView fcv) : path(fcv.path), rid(fcv.rid), value(fcv.value) {}
+ FullCellValue(PathView p, RowId r, CellView v) : path(p), rid(r), value(v) {}
+};
+
struct CellViewForPath {
RowId rid;
CellView value;
@@ -66,10 +78,8 @@ struct CellViewForPath {
* stores tuples of Path, RecordId and Value in a separate format.
*/
class ColumnStore {
-protected:
- class Cursor;
-
public:
+ class Cursor;
class WriteCursor {
public:
virtual ~WriteCursor() = default;
@@ -376,7 +386,10 @@ public:
_ident = std::move(newIdent);
}
-protected:
+ /**
+ * The Cursor class is for raw access to the index. Except for unusual use cases (e.g., index
+ * validation) you'll want to use CursorForPath instead.
+ */
class Cursor {
public:
virtual ~Cursor() = default;
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp
index aaaf5c27041..263d0bcbf67 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp
@@ -270,8 +270,11 @@ public:
if (!_eof && !_lastMoveSkippedKey) {
WT_ITEM key;
WT_CURSOR* c = _cursor->get();
- c->get_key(c, &key);
- _buffer.assign(static_cast<const char*>(key.data), key.size);
+ if (c->get_key(c, &key) == 0) {
+ _buffer.assign(static_cast<const char*>(key.data), key.size);
+ } else {
+ _buffer.clear();
+ }
}
resetCursor();
}
@@ -321,6 +324,10 @@ private:
WT_CURSOR* c = _cursor->get();
+ if (searchKey.empty()) {
+ return false;
+ }
+
const WiredTigerItem searchKeyItem(searchKey);
c->set_key(c, searchKeyItem.Get());
diff --git a/src/mongo/dbtests/storage_debug_util.cpp b/src/mongo/dbtests/storage_debug_util.cpp
index 9cf0b3c640a..89f5ca48582 100644
--- a/src/mongo/dbtests/storage_debug_util.cpp
+++ b/src/mongo/dbtests/storage_debug_util.cpp
@@ -72,7 +72,7 @@ void printCollectionAndIndexTableEntries(OperationContext* opCtx, const Namespac
const auto indexDescriptor = indexCatalogEntry->descriptor();
const auto iam = indexCatalogEntry->accessMethod()->asSortedData();
if (!iam) {
- LOGV2(6325100,
+ LOGV2(6325101,
"[Debugging] skipping index {index_name} because it isn't SortedData",
"index_name"_attr = indexDescriptor->indexName());
continue;