SERVER-71061 Added column-store index validation

Co-authored-by: Colin Stolley <colin.stolley@mongodb.com>
author: Mohammad Dashti <mdashti@gmail.com> 2023-01-05 10:51:14 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2023-01-05 11:55:01 +0000
commit: 72a56c27b43595e898ccdec5d859fe72b0d17135 (patch)
tree: ce5cfe83b2957b3e79deae2ee0706047fc01453c /src/mongo
parent: 6908c42231a9e8ac6611aa745cf37be31f954bd5 (diff)
download: mongo-72a56c27b43595e898ccdec5d859fe72b0d17135.tar.gz
19 files changed, 2357 insertions, 650 deletions
diff --git a/src/mongo/db/catalog/README.md b/src/mongo/db/catalog/README.md
index 005345a996e..35ccea2bbb8 100644
--- a/src/mongo/db/catalog/README.md
+++ b/src/mongo/db/catalog/README.md
@@ -1399,7 +1399,7 @@ Additionally, users can specify that they'd like to perform a `full` validation.
       for the collection and each index, data throttling (for background validation), and general
       information about the collection.
     + [IndexConsistency](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/index_consistency.h)
-      keeps track of the number of keys detected in the record store and indexes. Detects when there
+      descendents keep track of the number of keys detected in the record store and indexes. Detects when there
       are index inconsistencies and maintains the information about the inconsistencies for
       reporting.
     + [ValidateAdaptor](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/validate_adaptor.h)
@@ -1418,15 +1418,15 @@ Additionally, users can specify that they'd like to perform a `full` validation.
     + We choose a read timestamp (`ReadSource`) based on the validation mode: `kNoTimestamp`
     for foreground validation and `kCheckpoint` for background validation.
 * Traverses the `RecordStore` using the `ValidateAdaptor` object.
-    + [Validates each record and adds the document's index key set to the IndexConsistency object](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/validate_adaptor.cpp#L61-L140)
+    + [Validates each record and adds the document's index key set to the IndexConsistency objects](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/validate_adaptor.cpp#L61-L140)
       for consistency checks at later stages.
-        + In an effort to reduce the memory footprint of validation, the `IndexConsistency` object
+        + In an effort to reduce the memory footprint of validation, the `IndexConsistency` objects
           [hashes](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/index_consistency.cpp#L307-L309)
-          the keys passed in to one of many buckets.
-        + Document keys will
+          the keys (or paths) passed in to one of many buckets.
+        + Document keys (or paths) will
           [increment](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/index_consistency.cpp#L204-L214)
           the respective bucket.
-        + Index keys will
+        + Index keys (paths) will
           [decrement](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/index_consistency.cpp#L239-L248)
           the respective bucket.
     + Checks that the `RecordId` is in [increasing order](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/validate_adaptor.cpp#L305-L308).
@@ -1434,8 +1434,8 @@ Additionally, users can specify that they'd like to perform a `full` validation.
       stored in the `RecordStore` (when performing a foreground validation only).
 * Traverses the index entries for each index in the collection.
     + [Validates the index key order to ensure that index entries are in increasing or decreasing order](https://github.com/mongodb/mongo/blob/r4.5.0/src/mongo/db/catalog/validate_adaptor.cpp#L144-L188).
-    + Adds the index key to the `IndexConsistency` object for consistency checks at later stages.
-* After the traversals are finished, the `IndexConsistency` object is checked to detect any
+    + Adds the index key to the `IndexConsistency` objects for consistency checks at later stages.
+* After the traversals are finished, the `IndexConsistency` objects are checked to detect any
   inconsistencies between the collection and indexes.
     + If a bucket has a `value of 0`, then there are no inconsistencies for the keys that hashed
       there.
diff --git a/src/mongo/db/catalog/SConscript b/src/mongo/db/catalog/SConscript
index f392123499e..38a6ed37ffe 100644
--- a/src/mongo/db/catalog/SConscript
+++ b/src/mongo/db/catalog/SConscript
@@ -375,6 +375,7 @@ env.Library(
     target='catalog_impl',
     source=[
         'collection_impl.cpp',
+        'column_index_consistency.cpp',
         'database_holder_impl.cpp',
         'database_impl.cpp',
         'index_catalog_entry_impl.cpp',
@@ -389,6 +390,7 @@ env.Library(
         '$BUILD_DIR/mongo/db/concurrency/lock_manager',
         '$BUILD_DIR/mongo/db/curop',
         '$BUILD_DIR/mongo/db/dbcommands_idl',
+        '$BUILD_DIR/mongo/db/index/column_store_index',
         '$BUILD_DIR/mongo/db/index/index_access_method',
         '$BUILD_DIR/mongo/db/index/index_access_method_factory',
         '$BUILD_DIR/mongo/db/index/index_access_methods',
@@ -463,6 +465,7 @@ env.Library(
         '$BUILD_DIR/mongo/db/concurrency/exception_util',
         '$BUILD_DIR/mongo/db/curop',
         '$BUILD_DIR/mongo/db/index/index_access_method',
+        '$BUILD_DIR/mongo/db/index/index_access_methods',
         '$BUILD_DIR/mongo/db/multi_key_path_tracker',
         '$BUILD_DIR/mongo/db/record_id_helpers',
         '$BUILD_DIR/mongo/db/server_base',
@@ -663,6 +666,7 @@ if wiredtiger:
             'collection_validation_test.cpp',
             'collection_writer_test.cpp',
             'coll_mod_test.cpp',
+            'column_index_consistency_test.cpp',
             'commit_quorum_options_test.cpp',
             'create_collection_test.cpp',
             'database_test.cpp',
@@ -682,6 +686,7 @@ if wiredtiger:
             '$BUILD_DIR/mongo/db/catalog/collection_crud',
             '$BUILD_DIR/mongo/db/commands/test_commands_enabled',
             '$BUILD_DIR/mongo/db/index/index_access_method',
+            '$BUILD_DIR/mongo/db/index/index_access_methods',
             '$BUILD_DIR/mongo/db/index_builds_coordinator_mongod',
             '$BUILD_DIR/mongo/db/multitenancy',
             '$BUILD_DIR/mongo/db/op_observer/op_observer',
diff --git a/src/mongo/db/catalog/collection_validation.cpp b/src/mongo/db/catalog/collection_validation.cpp
index 2c6661a4f5c..0f07a819980 100644
--- a/src/mongo/db/catalog/collection_validation.cpp
+++ b/src/mongo/db/catalog/collection_validation.cpp
@@ -174,11 +174,10 @@ void _validateIndexes(OperationContext* opCtx,
  */
 void _gatherIndexEntryErrors(OperationContext* opCtx,
                              ValidateState* validateState,
-                             IndexConsistency* indexConsistency,
                              ValidateAdaptor* indexValidator,
                              ValidateResults* result) {
-    indexConsistency->setSecondPhase();
-    if (!indexConsistency->limitMemoryUsageForSecondPhase(result)) {
+    indexValidator->setSecondPhase();
+    if (!indexValidator->limitMemoryUsageForSecondPhase(result)) {
         return;
     }
 
@@ -223,12 +222,12 @@ void _gatherIndexEntryErrors(OperationContext* opCtx,
     }
 
     if (validateState->fixErrors()) {
-        indexConsistency->repairMissingIndexEntries(opCtx, result);
+        indexValidator->repairIndexEntries(opCtx, result);
     }
 
     LOGV2_OPTIONS(20301, {LogComponent::kIndex}, "Finished traversing through all the indexes");
 
-    indexConsistency->addIndexEntryErrors(result);
+    indexValidator->addIndexEntryErrors(opCtx, result);
 }
 
 void _validateIndexKeyCount(OperationContext* opCtx,
@@ -556,8 +555,7 @@ Status validate(OperationContext* opCtx,
                       logAttrs(validateState.nss()),
                       logAttrs(validateState.uuid()));
 
-        IndexConsistency indexConsistency(opCtx, &validateState);
-        ValidateAdaptor indexValidator(&indexConsistency, &validateState);
+        ValidateAdaptor indexValidator(opCtx, &validateState);
 
         // In traverseRecordStore(), the index validator keeps track the records in the record
         // store so that _validateIndexes() can confirm that the index entries match the records in
@@ -569,10 +567,10 @@ Status validate(OperationContext* opCtx,
         // Pause collection validation while a lock is held and between collection and index data
         // validation.
         //
-        // The IndexConsistency object saves document key information during collection data
-        // validation and then compares against that key information during index data validation.
-        // This fail point is placed in between them, in an attempt to catch any inconsistencies
-        // that concurrent CRUD ops might cause if we were to have a bug.
+        // The KeyStringIndexConsistency object saves document key information during collection
+        // data validation and then compares against that key information during index data
+        // validation. This fail point is placed in between them, in an attempt to catch any
+        // inconsistencies that concurrent CRUD ops might cause if we were to have a bug.
         //
         // Only useful for background validation because we hold an intent lock instead of an
         // exclusive lock, and thus allow concurrent operations.
@@ -592,14 +590,13 @@ Status validate(OperationContext* opCtx,
         // Validate indexes and check for mismatches.
         _validateIndexes(opCtx, &validateState, &indexValidator, results);
 
-        if (indexConsistency.haveEntryMismatch()) {
+        if (indexValidator.haveEntryMismatch()) {
             LOGV2_OPTIONS(20305,
                           {LogComponent::kIndex},
                           "Index inconsistencies were detected. "
                           "Starting the second phase of index validation to gather concise errors",
                           "namespace"_attr = validateState.nss());
-            _gatherIndexEntryErrors(
-                opCtx, &validateState, &indexConsistency, &indexValidator, results);
+            _gatherIndexEntryErrors(opCtx, &validateState, &indexValidator, results);
         }
 
         if (!results->valid) {
diff --git a/src/mongo/db/catalog/collection_validation_test.cpp b/src/mongo/db/catalog/collection_validation_test.cpp
index 15f8f475aec..19cfe013d13 100644
--- a/src/mongo/db/catalog/collection_validation_test.cpp
+++ b/src/mongo/db/catalog/collection_validation_test.cpp
@@ -27,13 +27,18 @@
  *    it in the license file.
  */
 
+#include <string>
+
 #include "mongo/bson/util/builder.h"
 #include "mongo/db/catalog/catalog_test_fixture.h"
 #include "mongo/db/catalog/collection_validation.h"
 #include "mongo/db/catalog/collection_write_path.h"
+#include "mongo/db/catalog/column_index_consistency.h"
 #include "mongo/db/db_raii.h"
+#include "mongo/db/index/column_key_generator.h"
 #include "mongo/db/index/index_access_method.h"
 #include "mongo/db/operation_context.h"
+#include "mongo/idl/server_parameter_test_util.h"
 #include "mongo/stdx/thread.h"
 #include "mongo/unittest/unittest.h"
 #include "mongo/util/bufreader.h"
@@ -53,7 +58,7 @@ private:
         CatalogTestFixture::setUp();
 
         // Create collection kNss for unit tests to use. It will possess a default _id index.
-        CollectionOptions defaultCollectionOptions;
+        const CollectionOptions defaultCollectionOptions;
         ASSERT_OK(storageInterface()->createCollection(
             operationContext(), kNss, defaultCollectionOptions));
     };
@@ -66,12 +71,13 @@ protected:
 };
 
 /**
- * Calls validate on collection kNss with both kValidateFull and kValidateNormal validation levels
+ * Calls validate on collection nss with both kValidateFull and kValidateNormal validation levels
  * and verifies the results.
  *
- * Returns list of validate results.
+ * Returns the list of validation results.
  */
 std::vector<std::pair<BSONObj, ValidateResults>> foregroundValidate(
+    const NamespaceString& nss,
     OperationContext* opCtx,
     bool valid,
     int numRecords,
@@ -86,7 +92,7 @@ std::vector<std::pair<BSONObj, ValidateResults>> foregroundValidate(
         ValidateResults validateResults;
         BSONObjBuilder output;
         ASSERT_OK(CollectionValidation::validate(
-            opCtx, kNss, mode, repairMode, &validateResults, &output));
+            opCtx, nss, mode, repairMode, &validateResults, &output));
         BSONObj obj = output.obj();
         BSONObjBuilder validateResultsBuilder;
         validateResults.appendToResultObj(&validateResultsBuilder, true /* debugging */);
@@ -127,17 +133,21 @@ ValidateResults omitTransientWarnings(const ValidateResults& results) {
 }
 
 /**
- * Calls validate on collection kNss with {background:true} and verifies the results.
+ * Calls validate on collection nss with {background:true} and verifies the results.
  * If 'runForegroundAsWell' is set, then foregroundValidate() above will be run in addition.
+ *
+ * Returns the list of validation results.
  */
-void backgroundValidate(OperationContext* opCtx,
-                        bool valid,
-                        int numRecords,
-                        int numInvalidDocuments,
-                        int numErrors,
-                        bool runForegroundAsWell) {
+std::vector<std::pair<BSONObj, ValidateResults>> backgroundValidate(const NamespaceString& nss,
+                                                                    OperationContext* opCtx,
+                                                                    bool valid,
+                                                                    int numRecords,
+                                                                    int numInvalidDocuments,
+                                                                    int numErrors,
+                                                                    bool runForegroundAsWell) {
+    std::vector<std::pair<BSONObj, ValidateResults>> res;
     if (runForegroundAsWell) {
-        foregroundValidate(opCtx, valid, numRecords, numInvalidDocuments, numErrors);
+        res = foregroundValidate(nss, opCtx, valid, numRecords, numInvalidDocuments, numErrors);
     }
 
     // This function will force a checkpoint, so background validation can then read from that
@@ -149,7 +159,7 @@ void backgroundValidate(OperationContext* opCtx,
     ValidateResults validateResults;
     BSONObjBuilder output;
     ASSERT_OK(CollectionValidation::validate(opCtx,
-                                             kNss,
+                                             nss,
                                              CollectionValidation::ValidateMode::kBackground,
                                              CollectionValidation::RepairMode::kNone,
                                              &validateResults,
@@ -161,24 +171,32 @@ void backgroundValidate(OperationContext* opCtx,
 
     ASSERT_EQ(obj.getIntField("nrecords"), numRecords);
     ASSERT_EQ(obj.getIntField("nInvalidDocuments"), numInvalidDocuments);
+
+    res.push_back({std::make_pair(obj, validateResults)});
+
+    return res;
 }
 
 /**
- * Inserts a range of documents into the kNss collection and then returns that count.
- * The range is defined by [startIDNum, endIDNum), not inclusive of endIDNum, using the numbers as
- * values for '_id' of the document being inserted.
+ * Inserts a range of documents into the nss collection and then returns that count. The range is
+ * defined by [startIDNum, startIDNum+numDocs), not inclusive of (startIDNum+numDocs), using the
+ * numbers as values for '_id' of the document being inserted followed by numFields fields.
  */
-int insertDataRange(OperationContext* opCtx, int startIDNum, int endIDNum) {
-    invariant(startIDNum < endIDNum,
-              str::stream() << "attempted to insert invalid data range from " << startIDNum
-                            << " to " << endIDNum);
-
-
-    AutoGetCollection coll(opCtx, kNss, MODE_IX);
+int insertDataRangeForNumFields(const NamespaceString& nss,
+                                OperationContext* opCtx,
+                                const int startIDNum,
+                                const int numDocs,
+                                const int numFields) {
+    const AutoGetCollection coll(opCtx, nss, MODE_IX);
     std::vector<InsertStatement> inserts;
-    for (int i = startIDNum; i < endIDNum; ++i) {
-        auto doc = BSON("_id" << i);
-        inserts.push_back(InsertStatement(doc));
+    for (int i = 0; i < numDocs; ++i) {
+        BSONObjBuilder bsonBuilder;
+        bsonBuilder << "_id" << i + startIDNum;
+        for (int c = 1; c <= numFields; ++c) {
+            bsonBuilder << "a" + std::to_string(c) << i + (i * numFields + startIDNum) + c;
+        }
+        const auto obj = bsonBuilder.obj();
+        inserts.push_back(InsertStatement(obj));
     }
 
     {
@@ -187,7 +205,20 @@ int insertDataRange(OperationContext* opCtx, int startIDNum, int endIDNum) {
             opCtx, *coll, inserts.begin(), inserts.end(), nullptr, false));
         wuow.commit();
     }
-    return endIDNum - startIDNum;
+    return numDocs;
+}
+
+/**
+ * Inserts a range of documents into the kNss collection and then returns that count. The range is
+ * defined by [startIDNum, endIDNum), not inclusive of endIDNum, using the numbers as values for
+ * '_id' of the document being inserted.
+ */
+int insertDataRange(OperationContext* opCtx, const int startIDNum, const int endIDNum) {
+    invariant(startIDNum < endIDNum,
+              str::stream() << "attempted to insert invalid data range from " << startIDNum
+                            << " to " << endIDNum);
+
+    return insertDataRangeForNumFields(kNss, opCtx, startIDNum, endIDNum - startIDNum, 0);
 }
 
 /**
@@ -209,17 +240,301 @@ int setUpInvalidData(OperationContext* opCtx) {
     return 1;
 }
 
+class CollectionValidationColumnStoreIndexTest : public CollectionValidationDiskTest {
+protected:
+    CollectionValidationColumnStoreIndexTest() : CollectionValidationDiskTest() {}
+
+    const BSONObj kColumnStoreSpec = BSON("name"
+                                          << "$**_columnstore"
+                                          << "key"
+                                          << BSON("$**"
+                                                  << "columnstore")
+                                          << "v" << 2);
+
+    /**
+     * This method decorates the execution of column-store index validation tests. It works by
+     * 1) creating an 'nss' namespace with a column-store index, 2) inserting documents into this
+     * namespace (via calling 'insertDocsFn'), 3) running validation on this collection and making
+     * sure it's valid, 4) running 'modifyIndexContentsFn', and 5) running 'postCheckFn', which
+     * usually contains an index validation call with relevant assertions.
+     *
+     * Returns the list of validation results.
+     */
+    std::vector<std::pair<BSONObj, ValidateResults>> runColumnStoreIndexTest(
+        const NamespaceString& nss,
+        std::function<int(OperationContext*, repl::StorageInterface*)> insertDocsFn,
+        std::function<void(OperationContext*, ColumnStore*, int)> modifyIndexContentsFn,
+        std::function<std::vector<std::pair<BSONObj, ValidateResults>>(OperationContext*, int)>
+            postCheckFn) {
+
+        RAIIServerParameterControllerForTest controller("featureFlagColumnstoreIndexes", true);
+
+        auto opCtx = operationContext();
+
+        {
+            // Durable catalog expects metadata updates to be timestamped but this is
+            // not necessary in our case - we just want to check the contents of the index table.
+            // The alternative here would be to provide a commit timestamp with a TimestamptBlock.
+            repl::UnreplicatedWritesBlock uwb(opCtx);
+
+            ASSERT_OK(
+                storageInterface()->createIndexesOnEmptyCollection(opCtx, nss, {kColumnStoreSpec}));
+        }
+
+        const auto numRecords = insertDocsFn(opCtx, storageInterface());
+
+        // Validate the collection here as a sanity check before we modify the index contents
+        // in-place.
+        foregroundValidate(nss,
+                           opCtx,
+                           /*valid*/ true,
+                           /*numRecords*/ numRecords,
+                           /*numInvalidDocuments*/ 0,
+                           /*numErrors*/ 0);
+
+        {
+            AutoGetCollection autoColl(opCtx, nss, MODE_IX);
+
+            const auto indexCatalog = autoColl->getIndexCatalog();
+            const auto descriptor = indexCatalog->findIndexByName(opCtx, "$**_columnstore");
+            ASSERT(descriptor) << "Cannot find $**_columnstore in index catalog";
+            const auto entry = indexCatalog->getEntry(descriptor);
+            ASSERT(entry) << "Cannot look up index catalog entry for index $**_columnstore";
+
+            const auto columnStore =
+                dynamic_cast<ColumnStoreAccessMethod*>(entry->accessMethod())->writableStorage();
+            ASSERT_FALSE(columnStore->isEmpty(opCtx))
+                << "index $**_columnstore should not be empty";
+
+            modifyIndexContentsFn(opCtx, columnStore, numRecords);
+        }
+
+        return postCheckFn(opCtx, numRecords);
+    }
+
+    /**
+     * Represents a fault where an index entry is deleted.
+     */
+    struct DeletionFault {};
+
+    /**
+     * Represents a fault where an index entry is additionally inserted.
+     */
+    struct InsertionFault {
+        // This value is inserted in the target index cell.
+        std::string insertedIndexValue;
+
+        InsertionFault(std::string iVal = "WRONG_KEY") : insertedIndexValue(iVal) {}
+    };
+
+    /**
+     * Represents a fault where the value of an index entry is replaced with a wrong value.
+     */
+    struct ReplacementFault {
+        // The actual index value is replaced with this given value in the target index cell.
+        std::string updatedIndexValue;
+
+        ReplacementFault(std::string uVal = "WRONG_KEY") : updatedIndexValue(uVal) {}
+    };
+
+    /**
+     * Represents an index corruption in field represented by 'fieldIndex' for a document with id
+     * equals to 'docIndex'. The actual fault can be one of the 'DeletionFault', 'InsertionFault',
+     * or 'ReplacementFault' options.
+     */
+    struct InjectedCorruption {
+        int fieldIndex;
+        int docIndex;
+        std::variant<DeletionFault, InsertionFault, ReplacementFault> fault;
+
+        InjectedCorruption(int fieldIdx,
+                           int docIdx,
+                           std::variant<DeletionFault, InsertionFault, ReplacementFault> f)
+            : fieldIndex(fieldIdx), docIndex(docIdx), fault(f) {}
+
+
+        /**
+         * Returns a field index for a field that doesn't exists. As field indexes are assumed to be
+         * non-negative, it's guaranteed that a negative field index does not exist. 'fldId' is used
+         * to produce more specific negative numbers for the non-existing field index to help with
+         * pinpointing the test failures.
+         */
+        static int getNonExistentFieldIndex(const int fldId) {
+            return -fldId;
+        }
+    };
+
+    int docIndexToRowId(const int docIndex) {
+        return docIndex + 1L;
+    }
+
+    /**
+     * This method runs a column-store index test on 'nss' via a call to 'runColumnStoreIndexTest'.
+     * First, it populates an 'nss' collection (with a column-store index defined on it) with
+     * 'numDocs' documents, where each document has 'numFields' fields. Then, applies a series of
+     * 'corruptions' on the column-store index. Finally, the index validation is ran on this
+     * collection (which now has a corrupted column-store index) and the validation results are
+     * returned.
+     *
+     * Note: passing 'doBackgroundValidation = true' performs both foreground and background
+     *       validations. However, this can only be done in unit-tests that have a single call to
+     *       this method.
+     *
+     * Returns the list of validation results.
+     */
+    std::vector<std::pair<BSONObj, ValidateResults>> validateIndexCorruptions(
+        const NamespaceString& nss,
+        const int numFields,
+        const int numDocs,
+        const std::vector<InjectedCorruption> corruptions,
+        const bool doBackgroundValidation = false) {
+        return runColumnStoreIndexTest(
+            nss,
+            /* insertDocsFn */
+            [&](OperationContext* opCtx, repl::StorageInterface* storageInterface) -> int {
+                return insertDataRangeForNumFields(
+                    nss, opCtx, /*startIDNum*/ 1, /*numDocs*/ numDocs, /*numFields*/ numFields);
+            },
+            // modifyIndexContentsFn: For each corruption specified, introduces the corruption and
+            // then in a separate transaction ensures the corruption is now present.
+            [&](OperationContext* opCtx, ColumnStore* columnStore, int numRecords) -> void {
+                const auto getPath = [](int corruptedFldIndex) -> std::string {
+                    return "a" + std::to_string(corruptedFldIndex);
+                };
+                const auto seekToCorruptedIndexEntry =
+                    [&](int corruptedFldIndex,
+                        int corruptedDocIndex) -> boost::optional<FullCellValue> {
+                    auto cursor = columnStore->newCursor(opCtx, getPath(corruptedFldIndex));
+                    ASSERT(cursor);
+                    const auto res =
+                        cursor->seekAtOrPast(RowId(docIndexToRowId(corruptedDocIndex)));
+                    return res ? FullCellValue(res.value()) : boost::optional<FullCellValue>();
+                };
+
+                for (const auto& corruption : corruptions) {
+                    const int corruptedFldIndex = corruption.fieldIndex;
+                    const int corruptedDocIndex = corruption.docIndex;
+
+                    const auto preCorruptionCell =
+                        seekToCorruptedIndexEntry(corruptedFldIndex, corruptedDocIndex);
+                    // Apply the requested corruption in a transaction.
+                    {
+                        WriteUnitOfWork wuow(opCtx);
+                        const auto cursor = columnStore->newWriteCursor(opCtx);
+                        if (std::holds_alternative<ReplacementFault>(corruption.fault)) {
+                            const auto toVal =
+                                std::get<ReplacementFault>(corruption.fault).updatedIndexValue;
+                            columnStore->update(opCtx,
+                                                getPath(corruptedFldIndex),
+                                                preCorruptionCell->rid,
+                                                StringData(toVal));
+                        } else if (std::holds_alternative<DeletionFault>(corruption.fault)) {
+                            columnStore->remove(
+                                opCtx, getPath(corruptedFldIndex), preCorruptionCell->rid);
+                        } else if (std::holds_alternative<InsertionFault>(corruption.fault)) {
+                            const auto toVal =
+                                std::get<InsertionFault>(corruption.fault).insertedIndexValue;
+                            columnStore->insert(opCtx,
+                                                getPath(corruptedFldIndex),
+                                                RowId(docIndexToRowId(corruptedDocIndex)),
+                                                StringData(toVal));
+                        } else {
+                            MONGO_UNREACHABLE;
+                        }
+                        wuow.commit();
+                    }
+
+                    // Confirm the requested corruption is actually applied (in a separate
+                    // transaction).
+                    {
+
+                        if (std::holds_alternative<ReplacementFault>(corruption.fault)) {
+                            const auto toVal =
+                                std::get<ReplacementFault>(corruption.fault).updatedIndexValue;
+                            const auto corruptedCell =
+                                seekToCorruptedIndexEntry(corruptedFldIndex, corruptedDocIndex);
+                            ASSERT_EQ(corruptedCell->path, getPath(corruptedFldIndex));
+                            ASSERT_EQ(corruptedCell->rid, preCorruptionCell->rid);
+                            ASSERT_EQ(corruptedCell->value, StringData(toVal));
+                        } else if (std::holds_alternative<DeletionFault>(corruption.fault)) {
+                            const auto corruptedCell =
+                                seekToCorruptedIndexEntry(corruptedFldIndex, corruptedDocIndex);
+                            if (numDocs == 1 || corruptedDocIndex == numDocs - 1) {
+                                ASSERT_FALSE(corruptedCell);
+                            } else {
+                                ASSERT_EQ(corruptedCell->path, getPath(corruptedFldIndex));
+                                ASSERT_GT(corruptedCell->rid, preCorruptionCell->rid);
+                            }
+                        } else if (std::holds_alternative<InsertionFault>(corruption.fault)) {
+                            const auto toVal =
+                                std::get<InsertionFault>(corruption.fault).insertedIndexValue;
+                            const auto corruptedCell =
+                                seekToCorruptedIndexEntry(corruptedFldIndex, corruptedDocIndex);
+                            ASSERT_EQ(corruptedCell->path, getPath(corruptedFldIndex));
+                            ASSERT_EQ(corruptedCell->rid,
+                                      RowId(docIndexToRowId(corruptedDocIndex)));
+                            ASSERT_EQ(corruptedCell->value, StringData(toVal));
+                        } else {
+                            MONGO_UNREACHABLE;
+                        }
+                    }
+                }
+            },
+            /* postCheckFn */
+            [&](OperationContext* opCtx,
+                int numRecords) -> std::vector<std::pair<BSONObj, ValidateResults>> {
+                auto serviceContext = opCtx->getServiceContext();
+
+                // Confirm there is an expected validation error
+                std::vector<std::pair<BSONObj, ValidateResults>> results;
+
+                if (doBackgroundValidation) {
+                    // Background validation must be done in a separate thread due to the
+                    // assumptions made in its implementation.
+                    stdx::thread runBackgroundValidate =
+                        stdx::thread([&serviceContext, &numRecords, &nss, &results] {
+                            ThreadClient tc("BackgroundValidate-thread", serviceContext);
+                            auto threadOpCtx = tc->makeOperationContext();
+                            auto bgResults = backgroundValidate(nss,
+                                                                threadOpCtx.get(),
+                                                                /*valid*/ false,
+                                                                /*numRecords*/ numRecords,
+                                                                /*numInvalidDocuments*/ 0,
+                                                                /*numErrors*/ 1,
+                                                                /*runForegroundAsWell*/ false);
+                            results.insert(results.end(), bgResults.begin(), bgResults.end());
+                        });
+                    // Make sure the background validation finishes successfully.
+                    runBackgroundValidate.join();
+                }
+
+                const auto fgResults = foregroundValidate(nss,
+                                                          opCtx,
+                                                          /*valid*/ false,
+                                                          /*numRecords*/ numRecords,
+                                                          /*numInvalidDocuments*/ 0,
+                                                          /*numErrors*/ 1);
+
+                results.insert(results.end(), fgResults.begin(), fgResults.end());
+
+                return results;
+            });
+    }
+};
+
 // Verify that calling validate() on an empty collection with different validation levels returns an
 // OK status.
 TEST_F(CollectionValidationTest, ValidateEmpty) {
-    foregroundValidate(operationContext(),
+    foregroundValidate(kNss,
+                       operationContext(),
                        /*valid*/ true,
                        /*numRecords*/ 0,
                        /*numInvalidDocuments*/ 0,
                        /*numErrors*/ 0);
 }
 TEST_F(CollectionValidationDiskTest, BackgroundValidateEmpty) {
-    backgroundValidate(operationContext(),
+    backgroundValidate(kNss,
+                       operationContext(),
                        /*valid*/ true,
                        /*numRecords*/ 0,
                        /*numInvalidDocuments*/ 0,
@@ -230,7 +545,8 @@ TEST_F(CollectionValidationDiskTest, BackgroundValidateEmpty) {
 // Verify calling validate() on a nonempty collection with different validation levels.
 TEST_F(CollectionValidationTest, Validate) {
     auto opCtx = operationContext();
-    foregroundValidate(opCtx,
+    foregroundValidate(kNss,
+                       opCtx,
                        /*valid*/ true,
                        /*numRecords*/ insertDataRange(opCtx, 0, 5),
                        /*numInvalidDocuments*/ 0,
@@ -238,7 +554,8 @@ TEST_F(CollectionValidationTest, Validate) {
 }
 TEST_F(CollectionValidationDiskTest, BackgroundValidate) {
     auto opCtx = operationContext();
-    backgroundValidate(opCtx,
+    backgroundValidate(kNss,
+                       opCtx,
                        /*valid*/ true,
                        /*numRecords*/ insertDataRange(opCtx, 0, 5),
                        /*numInvalidDocuments*/ 0,
@@ -249,7 +566,8 @@ TEST_F(CollectionValidationDiskTest, BackgroundValidate) {
 // Verify calling validate() on a collection with an invalid document.
 TEST_F(CollectionValidationTest, ValidateError) {
     auto opCtx = operationContext();
-    foregroundValidate(opCtx,
+    foregroundValidate(kNss,
+                       opCtx,
                        /*valid*/ false,
                        /*numRecords*/ setUpInvalidData(opCtx),
                        /*numInvalidDocuments*/ 1,
@@ -257,7 +575,8 @@ TEST_F(CollectionValidationTest, ValidateError) {
 }
 TEST_F(CollectionValidationDiskTest, BackgroundValidateError) {
     auto opCtx = operationContext();
-    backgroundValidate(opCtx,
+    backgroundValidate(kNss,
+                       opCtx,
                        /*valid*/ false,
                        /*numRecords*/ setUpInvalidData(opCtx),
                        /*numInvalidDocuments*/ 1,
@@ -268,7 +587,8 @@ TEST_F(CollectionValidationDiskTest, BackgroundValidateError) {
 // Verify calling validate() with enforceFastCount=true.
 TEST_F(CollectionValidationTest, ValidateEnforceFastCount) {
     auto opCtx = operationContext();
-    foregroundValidate(opCtx,
+    foregroundValidate(kNss,
+                       opCtx,
                        /*valid*/ true,
                        /*numRecords*/ insertDataRange(opCtx, 0, 5),
                        /*numInvalidDocuments*/ 0,
@@ -306,7 +626,7 @@ TEST_F(CollectionValidationDiskTest, BackgroundValidateRunsConcurrentlyWithWrite
             ThreadClient tc("BackgroundValidateConcurrentWithCRUD-thread", serviceContext);
             auto threadOpCtx = tc->makeOperationContext();
             backgroundValidate(
-                threadOpCtx.get(), true, numRecords, 0, 0, /*runForegroundAsWell*/ false);
+                kNss, threadOpCtx.get(), true, numRecords, 0, 0, /*runForegroundAsWell*/ false);
         });
 
         // Wait until validate starts and hangs mid-way on a failpoint, then do concurrent writes,
@@ -319,7 +639,8 @@ TEST_F(CollectionValidationDiskTest, BackgroundValidateRunsConcurrentlyWithWrite
     runBackgroundValidate.join();
 
     // Run regular foreground collection validation to make sure everything is OK.
-    foregroundValidate(opCtx,
+    foregroundValidate(kNss,
+                       opCtx,
                        /*valid*/ true,
                        /*numRecords*/ numRecords + numRecords2,
                        0,
@@ -376,7 +697,7 @@ TEST_F(CollectionValidationTest, ValidateOldUniqueIndexKeyWarning) {
 
     // Validate the collection here as a sanity check before we modify the index contents in-place.
     foregroundValidate(
-        opCtx, /*valid*/ true, /*numRecords*/ 1, /*numInvalidDocuments*/ 0, /*numErrors*/ 0);
+        kNss, opCtx, /*valid*/ true, /*numRecords*/ 1, /*numInvalidDocuments*/ 0, /*numErrors*/ 0);
 
     // Update existing entry in index to pre-4.2 format without record id in key string.
     {
@@ -429,21 +750,22 @@ TEST_F(CollectionValidationTest, ValidateOldUniqueIndexKeyWarning) {
         }
     }
 
-    auto results = foregroundValidate(opCtx,
-                                      /*valid*/ true,
-                                      /*numRecords*/ 1,
-                                      /*numInvalidDocuments*/ 0,
-                                      /*numErrors*/ 0);
+    const auto results = foregroundValidate(kNss,
+                                            opCtx,
+                                            /*valid*/ true,
+                                            /*numRecords*/ 1,
+                                            /*numInvalidDocuments*/ 0,
+                                            /*numErrors*/ 0);
     ASSERT_EQ(results.size(), 2);
 
     for (const auto& result : results) {
         const auto& validateResults = result.second;
         BSONObjBuilder builder;
-        bool debugging = true;
+        const bool debugging = true;
         validateResults.appendToResultObj(&builder, debugging);
-        auto obj = builder.obj();
+        const auto obj = builder.obj();
         ASSERT(validateResults.valid) << obj;
-        auto warningsWithoutTransientErrors = omitTransientWarnings(validateResults);
+        const auto warningsWithoutTransientErrors = omitTransientWarnings(validateResults);
         ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 1U) << obj;
         ASSERT_STRING_CONTAINS(warningsWithoutTransientErrors.warnings[0],
                                "Unique index a_1 has one or more keys in the old format")
@@ -451,5 +773,353 @@ TEST_F(CollectionValidationTest, ValidateOldUniqueIndexKeyWarning) {
     }
 }
 
+/**
+ * Checks whether a given 'entry' is equal to any element in the 'list'.
+ */
+bool equalsAny(const std::string& entry, const std::vector<std::string>& list) {
+    return std::any_of(
+        list.begin(), list.end(), [&entry](const std::string& other) { return entry == other; });
+}
+
+// Exhaustively tests having one error in the column-store index by updating one index entry with an
+// invalid value in different parts of the index on collections with different number of columns and
+// documents.
+TEST_F(CollectionValidationColumnStoreIndexTest, SingleInvalidIndexEntryCSI) {
+    const int kNumFields = 4;
+    const int kMaxNumDocs = 4;
+
+    int testCaseIdx = 0;
+    for (int numFields = 1; numFields <= kNumFields; ++numFields) {
+        for (int numDocs = 1; numDocs <= kMaxNumDocs; ++numDocs) {
+            for (int corruptedFldIndex = 1; corruptedFldIndex <= numFields; ++corruptedFldIndex) {
+                for (int corruptedDocIndex = 0; corruptedDocIndex < numDocs; ++corruptedDocIndex) {
+                    const NamespaceString nss(kNss.toString() + std::to_string(++testCaseIdx));
+
+                    // Create collection nss for unit tests to use.
+                    const CollectionOptions defaultCollectionOptions;
+                    ASSERT_OK(storageInterface()->createCollection(
+                        operationContext(), nss, defaultCollectionOptions));
+
+                    const auto results = validateIndexCorruptions(
+                        nss,
+                        numFields,
+                        numDocs,
+                        /* column-store index corruptions */
+                        {{corruptedFldIndex,
+                          corruptedDocIndex,
+                          /* Update the current index entry with an invalid value. */
+                          ReplacementFault("WRONG_" + std::to_string(corruptedFldIndex) + "_" +
+                                           std::to_string(corruptedDocIndex))}},
+                        /* doBackgroundValidation */ true);
+
+                    ASSERT_EQ(results.size(), 3);
+
+                    for (const auto& result : results) {
+                        const auto& validateResults = result.second;
+                        BSONObjBuilder builder;
+                        const bool debugging = true;
+                        validateResults.appendToResultObj(&builder, debugging);
+                        const auto obj = builder.obj();
+                        ASSERT_FALSE(validateResults.valid) << obj;
+
+                        const auto warningsWithoutTransientErrors =
+                            omitTransientWarnings(validateResults);
+                        ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 2U) << obj;
+                        ASSERT(equalsAny("Detected 1 missing index entries.",
+                                         warningsWithoutTransientErrors.warnings))
+                            << obj;
+                        ASSERT(equalsAny("Detected 1 extra index entries.",
+                                         warningsWithoutTransientErrors.warnings))
+                            << obj;
+
+                        ASSERT_EQ(validateResults.errors.size(), 1U) << obj;
+                        ASSERT_EQ(validateResults.errors[0],
+                                  "Index with name '$**_columnstore' has inconsistencies.")
+                            << obj;
+
+                        const auto& extraEntries = validateResults.extraIndexEntries;
+                        ASSERT_EQ(extraEntries.size(), 1U) << obj;
+                        ASSERT_EQ(extraEntries[0]["indexName"].String(), "$**_columnstore") << obj;
+                        ASSERT_EQ(extraEntries[0]["recordId"].Long(),
+                                  docIndexToRowId(corruptedDocIndex))
+                            << obj;
+                        ASSERT_EQ(extraEntries[0]["rowId"].Long(),
+                                  docIndexToRowId(corruptedDocIndex))
+                            << obj;
+                        ASSERT_EQ(extraEntries[0]["indexPath"].String(),
+                                  "a" + std::to_string(corruptedFldIndex))
+                            << obj;
+
+                        const auto& missingEntries = validateResults.missingIndexEntries;
+                        ASSERT_EQ(missingEntries.size(), 1U) << obj;
+                        ASSERT_EQ(missingEntries[0]["indexName"].String(), "$**_columnstore")
+                            << obj;
+                        ASSERT_EQ(missingEntries[0]["recordId"].Long(),
+                                  docIndexToRowId(corruptedDocIndex))
+                            << obj;
+                        ASSERT_EQ(missingEntries[0]["rowId"].Long(),
+                                  docIndexToRowId(corruptedDocIndex))
+                            << obj;
+                        ASSERT_EQ(missingEntries[0]["indexPath"].String(),
+                                  "a" + std::to_string(corruptedFldIndex))
+                            << obj;
+
+                        ASSERT_EQ(validateResults.corruptRecords.size(), 0U) << obj;
+                        ASSERT_EQ(validateResults.numRemovedCorruptRecords, 0U) << obj;
+                        ASSERT_EQ(validateResults.numRemovedExtraIndexEntries, 0U) << obj;
+                        ASSERT_EQ(validateResults.numInsertedMissingIndexEntries, 0U) << obj;
+                        ASSERT_EQ(validateResults.numDocumentsMovedToLostAndFound, 0U) << obj;
+                        ASSERT_EQ(validateResults.numOutdatedMissingIndexEntry, 0U) << obj;
+                    }
+                }
+            }
+        }
+    }
+}
+
+// Exhaustively tests having one error in the column-store index by adding an extra index entry on
+// collections with different number of columns and documents.
+TEST_F(CollectionValidationColumnStoreIndexTest, SingleExtraIndexEntry) {
+    const int kNumFields = 4;
+    const int kMaxNumDocs = 4;
+
+    std::vector<std::pair<int, int>> extraIndexEntryCorruptions = {
+        /* non-existent field on an existing document */
+        {/*corruptedFldIndex*/ InjectedCorruption::getNonExistentFieldIndex(1),
+         /*corruptedDocIndex*/ 0},
+        /* non-existent field on a non-existent document */
+        {/*corruptedFldIndex*/ InjectedCorruption::getNonExistentFieldIndex(2),
+         /*corruptedDocIndex*/ kMaxNumDocs * 10}};
+    for (int corruptedFldIndex = 1; corruptedFldIndex <= kNumFields; ++corruptedFldIndex) {
+        /* existing field on a non-existent document */
+        extraIndexEntryCorruptions.push_back(
+            {corruptedFldIndex, /*corruptedDocIndex*/ kMaxNumDocs * 10});
+    }
+
+    int testCaseIdx = 0;
+    for (int numFields = 1; numFields <= kNumFields; ++numFields) {
+        for (int numDocs = 1; numDocs <= kMaxNumDocs; ++numDocs) {
+            for (const auto& corruption : extraIndexEntryCorruptions) {
+                const int corruptedFldIndex = corruption.first;
+                const int corruptedDocIndex = corruption.second;
+
+                const auto nss = NamespaceString(kNss.toString() + std::to_string(++testCaseIdx));
+
+                // Create collection nss for unit tests to use.
+                const CollectionOptions defaultCollectionOptions;
+                ASSERT_OK(storageInterface()->createCollection(
+                    operationContext(), nss, defaultCollectionOptions));
+
+                const auto results = validateIndexCorruptions(
+                    nss,
+                    numFields,
+                    numDocs,
+                    /* column-store index corruptions */
+                    {{corruptedFldIndex,
+                      corruptedDocIndex,
+                      /* Insert an extra index entry. */
+                      InsertionFault("WRONG_" + std::to_string(corruptedFldIndex) + "_" +
+                                     std::to_string(corruptedDocIndex))}},
+                    /* doBackgroundValidation */ true);
+
+                ASSERT_EQ(results.size(), 3);
+
+                for (const auto& result : results) {
+                    const auto& validateResults = result.second;
+                    BSONObjBuilder builder;
+                    const bool debugging = true;
+                    validateResults.appendToResultObj(&builder, debugging);
+                    const auto obj = builder.obj();
+                    ASSERT_FALSE(validateResults.valid) << obj;
+
+                    const auto warningsWithoutTransientErrors =
+                        omitTransientWarnings(validateResults);
+                    ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 1U) << obj;
+                    ASSERT(fmt::format("Detected {} extra index entries.", 1) ==
+                           warningsWithoutTransientErrors.warnings[0])
+                        << obj;
+
+                    ASSERT_EQ(validateResults.errors.size(), 1U) << obj;
+                    ASSERT_EQ(validateResults.errors[0],
+                              "Index with name '$**_columnstore' has inconsistencies.")
+                        << obj;
+
+                    const auto& extraEntries = validateResults.extraIndexEntries;
+                    ASSERT_EQ(extraEntries.size(), 1) << obj;
+                    ASSERT_EQ(extraEntries[0]["indexName"].String(), "$**_columnstore") << obj;
+                    ASSERT_EQ(extraEntries[0]["recordId"].Long(),
+                              docIndexToRowId(corruptedDocIndex))
+                        << obj;
+                    ASSERT_EQ(extraEntries[0]["rowId"].Long(), docIndexToRowId(corruptedDocIndex))
+                        << obj;
+                    ASSERT_EQ(extraEntries[0]["indexPath"].String(),
+                              "a" + std::to_string(corruptedFldIndex))
+                        << obj;
+
+                    ASSERT_EQ(validateResults.missingIndexEntries.size(), 0U) << obj;
+                    ASSERT_EQ(validateResults.corruptRecords.size(), 0U) << obj;
+                    ASSERT_EQ(validateResults.numRemovedCorruptRecords, 0U) << obj;
+                    ASSERT_EQ(validateResults.numRemovedExtraIndexEntries, 0U) << obj;
+                    ASSERT_EQ(validateResults.numInsertedMissingIndexEntries, 0U) << obj;
+                    ASSERT_EQ(validateResults.numDocumentsMovedToLostAndFound, 0U) << obj;
+                    ASSERT_EQ(validateResults.numOutdatedMissingIndexEntry, 0U) << obj;
+                }
+            }
+        }
+    }
+}
+
+// Exhaustively tests having one error in the column-store index by removing one index entry from
+// different parts of the index on collections with different number of columns and documents.
+TEST_F(CollectionValidationColumnStoreIndexTest, SingleMissingIndexEntryCSI) {
+    const int kNumFields = 4;
+    const int kMaxNumDocs = 4;
+
+    int testCaseIdx = 0;
+    for (int numFields = 1; numFields <= kNumFields; ++numFields) {
+        for (int numDocs = 1; numDocs <= kMaxNumDocs; ++numDocs) {
+            for (int corruptedFldIndex = 1; corruptedFldIndex <= numFields; ++corruptedFldIndex) {
+                for (int corruptedDocIndex = 0; corruptedDocIndex < numDocs; ++corruptedDocIndex) {
+                    const NamespaceString nss(kNss.toString() + std::to_string(++testCaseIdx));
+
+                    // Create collection nss for unit tests to use.
+                    const CollectionOptions defaultCollectionOptions;
+                    ASSERT_OK(storageInterface()->createCollection(
+                        operationContext(), nss, defaultCollectionOptions));
+
+                    const auto results =
+                        validateIndexCorruptions(nss,
+                                                 numFields,
+                                                 numDocs,
+                                                 /* column-store index corruptions */
+                                                 {{corruptedFldIndex,
+                                                   corruptedDocIndex,
+                                                   /* Remove the existing index entry. */
+                                                   DeletionFault()}},
+                                                 /* doBackgroundValidation */ true);
+
+                    ASSERT_EQ(results.size(), 3);
+
+                    for (const auto& result : results) {
+                        const auto& validateResults = result.second;
+                        BSONObjBuilder builder;
+                        const bool debugging = true;
+                        validateResults.appendToResultObj(&builder, debugging);
+                        const auto obj = builder.obj();
+                        ASSERT_FALSE(validateResults.valid) << obj;
+
+                        const auto warningsWithoutTransientErrors =
+                            omitTransientWarnings(validateResults);
+                        ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 1U) << obj;
+                        ASSERT("Detected 1 missing index entries." ==
+                               warningsWithoutTransientErrors.warnings[0])
+                            << obj;
+
+                        ASSERT_EQ(validateResults.errors.size(), 1U) << obj;
+                        ASSERT_EQ(validateResults.errors[0],
+                                  "Index with name '$**_columnstore' has inconsistencies.")
+                            << obj;
+
+                        const auto& missingEntries = validateResults.missingIndexEntries;
+                        ASSERT_EQ(missingEntries.size(), 1U) << obj;
+                        ASSERT_EQ(missingEntries[0]["indexName"].String(), "$**_columnstore")
+                            << obj;
+                        ASSERT_EQ(missingEntries[0]["recordId"].Long(),
+                                  docIndexToRowId(corruptedDocIndex))
+                            << obj;
+                        ASSERT_EQ(missingEntries[0]["rowId"].Long(),
+                                  docIndexToRowId(corruptedDocIndex))
+                            << obj;
+                        ASSERT_EQ(missingEntries[0]["indexPath"].String(),
+                                  "a" + std::to_string(corruptedFldIndex))
+                            << obj;
+
+                        ASSERT_EQ(validateResults.extraIndexEntries.size(), 0U) << obj;
+                        ASSERT_EQ(validateResults.corruptRecords.size(), 0U) << obj;
+                        ASSERT_EQ(validateResults.numRemovedCorruptRecords, 0U) << obj;
+                        ASSERT_EQ(validateResults.numRemovedExtraIndexEntries, 0U) << obj;
+                        ASSERT_EQ(validateResults.numInsertedMissingIndexEntries, 0U) << obj;
+                        ASSERT_EQ(validateResults.numDocumentsMovedToLostAndFound, 0U) << obj;
+                        ASSERT_EQ(validateResults.numOutdatedMissingIndexEntry, 0U) << obj;
+                    }
+                }
+            }
+        }
+    }
+}
+
+// Tests having multiple errors in the column-store index by updating several index entries with an
+// invalid value in different parts of the index on a collection with multiple columns and multiple
+// documents.
+TEST_F(CollectionValidationColumnStoreIndexTest, MultipleInvalidIndexEntryCSI) {
+    const int numFields = 10;
+    const int numDocs = 50;
+
+    auto results = validateIndexCorruptions(
+        kNss,
+        numFields,
+        numDocs,
+        /* column-store index corruptions */
+        {{/* corruptedFldIndex */ 5,
+          /* corruptedDocIndex */ 9,
+          /* Remove the existing index entry. */
+          DeletionFault()},
+         {/* corruptedFldIndex */ 7,
+          /* corruptedDocIndex */ 2,
+          /* Update the current index entry with an invalid value. */
+          ReplacementFault()},
+         {/* corruptedFldIndex */ 9,
+          /* corruptedDocIndex */ 500,
+          /* Insert an extra index entry for a non-exisiting document. */
+          InsertionFault()},
+         {/* corruptedFldIndex */ InjectedCorruption::getNonExistentFieldIndex(1),
+          /* corruptedDocIndex */ 5,
+          /* Insert an extra index entry for a non-existing field of an existing document. */
+          InsertionFault()},
+         {/* corruptedFldIndex */ InjectedCorruption::getNonExistentFieldIndex(2),
+          /* corruptedDocIndex */ 600,
+          /* Insert an extra index entry for a non-existing field of a non-existing document. */
+          InsertionFault()},
+         {/* corruptedFldIndex */ 2,
+          /* corruptedDocIndex */ 33,
+          /* Update the current index entry with an invalid value. */
+          ReplacementFault()}},
+        /* doBackgroundValidation */ true);
+
+    ASSERT_EQ(results.size(), 3);
+
+    for (const auto& result : results) {
+        const auto& validateResults = result.second;
+        BSONObjBuilder builder;
+        const bool debugging = true;
+        validateResults.appendToResultObj(&builder, debugging);
+        const auto obj = builder.obj();
+        ASSERT_FALSE(validateResults.valid) << obj;
+
+        const auto warningsWithoutTransientErrors = omitTransientWarnings(validateResults);
+        ASSERT_EQ(warningsWithoutTransientErrors.warnings.size(), 2U) << obj;
+        ASSERT(
+            equalsAny("Detected 3 missing index entries.", warningsWithoutTransientErrors.warnings))
+            << obj;
+        ASSERT(
+            equalsAny("Detected 5 extra index entries.", warningsWithoutTransientErrors.warnings))
+            << obj;
+
+        ASSERT_EQ(validateResults.errors.size(), 1U) << obj;
+        ASSERT_EQ(validateResults.errors[0],
+                  "Index with name '$**_columnstore' has inconsistencies.")
+            << obj;
+
+        ASSERT_EQ(validateResults.missingIndexEntries.size(), 3U) << obj;
+        ASSERT_EQ(validateResults.extraIndexEntries.size(), 5U) << obj;
+        ASSERT_EQ(validateResults.corruptRecords.size(), 0U) << obj;
+        ASSERT_EQ(validateResults.numRemovedCorruptRecords, 0U) << obj;
+        ASSERT_EQ(validateResults.numRemovedExtraIndexEntries, 0U) << obj;
+        ASSERT_EQ(validateResults.numInsertedMissingIndexEntries, 0U) << obj;
+        ASSERT_EQ(validateResults.numDocumentsMovedToLostAndFound, 0U) << obj;
+        ASSERT_EQ(validateResults.numOutdatedMissingIndexEntry, 0U) << obj;
+    }
+}
+
 }  // namespace
 }  // namespace mongo
diff --git a/src/mongo/db/catalog/column_index_consistency.cpp b/src/mongo/db/catalog/column_index_consistency.cpp
new file mode 100644
index 00000000000..dfa6d65b7e6
--- /dev/null
+++ b/src/mongo/db/catalog/column_index_consistency.cpp
@@ -0,0 +1,282 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/catalog/column_index_consistency.h"
+#include "mongo/db/concurrency/exception_util.h"
+
+namespace mongo {
+
+int64_t ColumnIndexConsistency::traverseIndex(OperationContext* opCtx,
+                                              const IndexCatalogEntry* index,
+                                              ProgressMeterHolder& _progress,
+                                              ValidateResults* results) {
+    const auto indexName = index->descriptor()->indexName();
+
+    // Ensure that this index has an open index cursor.
+    const auto csiCursorIt = _validateState->getColumnStoreCursors().find(indexName);
+    invariant(csiCursorIt != _validateState->getColumnStoreCursors().end());
+
+    const std::unique_ptr<ColumnStore::Cursor>& csiCursor = csiCursorIt->second;
+
+    int64_t numIndexEntries = 0;
+    // Traverse the index in this for loop.
+    for (auto cell = csiCursor->seekAtOrPast("", ColumnStore::kNullRowId); cell;
+         cell = csiCursor->next()) {
+        {
+            stdx::unique_lock<Client> lk(*opCtx->getClient());
+            _progress.get(lk)->hit();
+        }
+        ++numIndexEntries;
+
+        if (numIndexEntries % kInterruptIntervalNumRecords == 0) {
+            // Periodically checks for interrupts and yields.
+            opCtx->checkForInterrupt();
+            _validateState->yield(opCtx);
+        }
+
+        if (_firstPhase) {
+            addIndexEntry(cell.get());
+        } else {
+            _updateSuspectList(cell.get(), results);
+        }
+    }
+
+    return numIndexEntries;
+}
+
+void ColumnIndexConsistency::traverseRecord(OperationContext* opCtx,
+                                            const CollectionPtr& coll,
+                                            const IndexCatalogEntry* index,
+                                            const RecordId& recordId,
+                                            const BSONObj& record,
+                                            ValidateResults* results) {
+    ColumnStoreAccessMethod* csam = dynamic_cast<ColumnStoreAccessMethod*>(index->accessMethod());
+
+    // Shred the record.
+    csam->getKeyGen().visitCellsForInsert(
+        record, [&](PathView path, const column_keygen::UnencodedCellView& unencodedCell) {
+            _cellBuilder.reset();
+            column_keygen::writeEncodedCell(unencodedCell, &_cellBuilder);
+            tassert(
+                7106112, "RecordID cannot be a string for column store indexes", !recordId.isStr());
+            const auto cell = FullCellView{
+                path, recordId.getLong(), CellView(_cellBuilder.buf(), _cellBuilder.len())};
+            if (_firstPhase) {
+                addDocEntry(cell);
+            } else {
+                _updateSuspectList(cell, results);
+            }
+        });
+}
+
+void ColumnIndexConsistency::_addIndexEntryErrors(OperationContext* opCtx,
+                                                  const IndexCatalogEntry* index,
+                                                  ValidateResults* results) {
+
+    if (_firstPhase) {
+        return;
+    }
+
+    const auto indexName = index->descriptor()->indexName();
+
+    // Ensure that this index has an open index cursor.
+    const auto csiCursorIt = _validateState->getColumnStoreCursors().find(indexName);
+    invariant(csiCursorIt != _validateState->getColumnStoreCursors().end());
+
+    const auto& csiCursor = csiCursorIt->second;
+
+    const ColumnStoreAccessMethod* csam =
+        dynamic_cast<ColumnStoreAccessMethod*>(index->accessMethod());
+
+    for (const auto rowId : _suspects) {
+        // Gather all paths for this RecordId.
+        StringMap<int> paths;
+        PathValue nextPath = "";
+        while (auto next = csiCursor->seekAtOrPast(nextPath, rowId)) {
+            if (next->rid < rowId) {
+                nextPath.assign(next->path.rawData(), next->path.size());
+                continue;
+            }
+
+            if (next->rid == rowId) {
+                paths[next->path]++;
+            }
+            nextPath.assign(next->path.rawData(), next->path.size());
+            nextPath += '\x01';  // Next possible path (\0 is not allowed).
+        }
+
+        const RecordId recordId(rowId);
+        const auto& rowStoreCursor = _validateState->getTraverseRecordStoreCursor();
+        if (const auto record = rowStoreCursor->seekExact(opCtx, recordId); record) {
+            // Shred the document and check each path/value against csi(path, rowId).
+            csam->getKeyGen().visitCellsForInsert(
+                record->data.toBson(),
+                [&](PathView path, const column_keygen::UnencodedCellView& cell) {
+                    _cellBuilder.reset();
+                    column_keygen::writeEncodedCell(cell, &_cellBuilder);
+                    const auto rowCell =
+                        FullCellView{path, rowId, CellView(_cellBuilder.buf(), _cellBuilder.len())};
+
+                    const auto csiCell = csiCursor->seekExact(path, rowId);
+                    if (!csiCell) {
+                        // Rowstore has entry that index doesn't
+                        _missingIndexEntries.emplace_back(rowCell);
+                        results->missingIndexEntries.push_back(
+                            _generateInfo(csam->indexName(), recordId, path, rowId));
+                    } else if (csiCell->value != rowCell.value) {
+                        // Rowstore and index values diverge
+                        _extraIndexEntries.emplace_back(csiCell.get());
+                        _missingIndexEntries.emplace_back(rowCell);
+                        results->missingIndexEntries.push_back(_generateInfo(
+                            csam->indexName(), recordId, path, rowId, csiCell->value));
+                        results->extraIndexEntries.push_back(
+                            _generateInfo(csam->indexName(), recordId, path, rowId, rowCell.value));
+                    }
+                    if (paths.count(rowCell.path) == 1) {
+                        paths.erase(rowCell.path);
+                    } else {
+                        paths[rowCell.path]--;
+                    }
+                });
+        }
+
+        // Extra paths in index that don't exist in the row-store.
+        for (const auto& kv : paths) {
+            for (int i = 0; i < kv.second; i++) {
+                _extraIndexEntries.emplace_back(kv.first, rowId, ""_sd);
+                results->extraIndexEntries.push_back(
+                    _generateInfo(csam->indexName(), recordId, kv.first, rowId));
+            }
+        }
+    }
+
+    if (!_missingIndexEntries.empty() || !_extraIndexEntries.empty()) {
+        StringBuilder ss;
+        ss << "Index with name '" << csam->indexName() << "' has inconsistencies.";
+        results->errors.push_back(ss.str());
+        results->indexResultsMap.at(csam->indexName()).valid = false;
+    }
+    if (!_missingIndexEntries.empty()) {
+        StringBuilder ss;
+        ss << "Detected " << _missingIndexEntries.size() << " missing index entries.";
+        results->warnings.push_back(ss.str());
+        results->valid = false;
+    }
+    if (!_extraIndexEntries.empty()) {
+        StringBuilder ss;
+        ss << "Detected " << _extraIndexEntries.size() << " extra index entries.";
+        results->warnings.push_back(ss.str());
+        results->valid = false;
+    }
+}
+
+void ColumnIndexConsistency::addIndexEntryErrors(OperationContext* opCtx,
+                                                 ValidateResults* results) {
+    int numColumnStoreIndexes = 0;
+    for (const auto& index : _validateState->getIndexes()) {
+        const IndexDescriptor* descriptor = index->descriptor();
+        if (descriptor->getAccessMethodName() == IndexNames::COLUMN) {
+            ++numColumnStoreIndexes;
+            uassert(7106138,
+                    "The current implementation only supports a single column-store index.",
+                    numColumnStoreIndexes <= 1);
+            _addIndexEntryErrors(opCtx, index.get(), results);
+        }
+    }
+}
+
+void ColumnIndexConsistency::repairIndexEntries(OperationContext* opCtx, ValidateResults* results) {
+    // TODO SERVER-71560 Repair should be implemented.
+}
+
+void ColumnIndexConsistency::validateIndexKeyCount(OperationContext* opCtx,
+                                                   const IndexCatalogEntry* index,
+                                                   long long* numRecords,
+                                                   IndexValidateResults& results) {
+    // Nothing to do.
+}
+
+// Generate info for the validate() command output.
+BSONObj ColumnIndexConsistency::_generateInfo(const std::string& indexName,
+                                              const RecordId& recordId,
+                                              const PathView path,
+                                              RowId rowId,
+                                              StringData value) {
+    BSONObjBuilder infoBuilder;
+
+    infoBuilder.append("indexName", indexName);
+    recordId.serializeToken("recordId", &infoBuilder);
+    if (rowId != ColumnStore::kNullRowId) {
+        infoBuilder.append("rowId", rowId);
+    }
+    if (!path.empty()) {
+        infoBuilder.append("indexPath", path);
+    }
+    if (!value.empty()) {
+        infoBuilder.append("value", value);
+    }
+    return infoBuilder.obj();
+}
+
+void ColumnIndexConsistency::addDocEntry(const FullCellView& val) {
+    _tabulateEntry(val, 1);
+    _numDocs++;
+}
+
+void ColumnIndexConsistency::addIndexEntry(const FullCellView& val) {
+    _tabulateEntry(val, -1);
+    _numIndexEntries++;
+}
+
+void ColumnIndexConsistency::_updateSuspectList(const FullCellView& cell,
+                                                ValidateResults* results) {
+    const auto rawHash = _hash(cell);
+    const auto hashLower = rawHash % _indexKeyBuckets.size();
+    const auto hashUpper = (rawHash / _indexKeyBuckets.size()) % _indexKeyBuckets.size();
+
+    if (_indexKeyBuckets[hashLower].indexKeyCount != 0 ||
+        _indexKeyBuckets[hashUpper].indexKeyCount != 0) {
+        _suspects.insert(cell.rid);
+    }
+}
+
+void ColumnIndexConsistency::_tabulateEntry(const FullCellView& cell, int step) {
+    const auto rawHash = _hash(cell);
+    const auto hashLower = rawHash % _indexKeyBuckets.size();
+    const auto hashUpper = (rawHash / _indexKeyBuckets.size()) % _indexKeyBuckets.size();
+    auto& lower = _indexKeyBuckets[hashLower];
+    auto& upper = _indexKeyBuckets[hashUpper];
+    const auto cellSz = cell.path.size() + sizeof(cell.rid) + cell.value.size();
+
+    lower.indexKeyCount += step;
+    lower.bucketSizeBytes += cellSz;
+    upper.indexKeyCount += step;
+    upper.bucketSizeBytes += cellSz;
+}
+}  // namespace mongo
diff --git a/src/mongo/db/catalog/column_index_consistency.h b/src/mongo/db/catalog/column_index_consistency.h
new file mode 100644
index 00000000000..a503e8c7154
--- /dev/null
+++ b/src/mongo/db/catalog/column_index_consistency.h
@@ -0,0 +1,201 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include <absl/hash/hash.h>
+#include <algorithm>
+#include <vector>
+
+#include "mongo/db/catalog/index_consistency.h"
+#include "mongo/db/catalog/throttle_cursor.h"
+#include "mongo/db/catalog/validate_state.h"
+#include "mongo/db/index/columns_access_method.h"
+#include "mongo/db/storage/column_store.h"
+#include "mongo/stdx/unordered_set.h"
+#include "mongo/util/progress_meter.h"
+
+namespace mongo {
+
+/**
+ * The ColumnIndexConsistency class is used to keep track of the index consistency in column-stor
+ * indexes. It does this by using the index keys from index entries and index keys generated from
+ * the document to ensure there is a one-to-one mapping for each key.
+ * In addition, an IndexObserver class can be hooked into the IndexAccessMethod to inform this class
+ * about changes to the indexes during a validation and compensate for them.
+ */
+class ColumnIndexConsistency final : protected IndexConsistency {
+
+public:
+    ColumnIndexConsistency(OperationContext* opCtx,
+                           CollectionValidation::ValidateState* validateState,
+                           const size_t numHashBuckets = kNumHashBuckets)
+        : IndexConsistency(opCtx, validateState, numHashBuckets) {}
+
+    void setSecondPhase() {
+        IndexConsistency::setSecondPhase();
+    }
+
+    /**
+     * Traverses the column-store index via 'cursor' and accumulates the traversal results.
+     */
+    int64_t traverseIndex(OperationContext* opCtx,
+                          const IndexCatalogEntry* index,
+                          ProgressMeterHolder& _progress,
+                          ValidateResults* results);
+
+    /**
+     * Traverses all paths in a single record from the row-store via the given {'recordId','record'}
+     * pair and accumulates the traversal results.
+     */
+    void traverseRecord(OperationContext* opCtx,
+                        const CollectionPtr& coll,
+                        const IndexCatalogEntry* index,
+                        const RecordId& recordId,
+                        const BSONObj& recordBson,
+                        ValidateResults* results);
+
+    /**
+     * Returns true if any value in the `_indexKeyCount` map is not equal to 0, otherwise return
+     * false.
+     */
+    bool haveEntryMismatch() const {
+        return std::any_of(_indexKeyBuckets.cbegin(), _indexKeyBuckets.cend(), [](auto b) {
+            return b.indexKeyCount != 0;
+        });
+    }
+
+    /**
+     * If repair mode enabled, try inserting _missingIndexEntries into indexes.
+     */
+    void repairIndexEntries(OperationContext* opCtx, ValidateResults* results);
+
+    /**
+     * Records the errors gathered from the second phase of index validation into the provided
+     * ValidateResultsMap and ValidateResults.
+     */
+    void addIndexEntryErrors(OperationContext* opCtx, ValidateResults* results);
+
+    /**
+     * Sets up this IndexConsistency object to limit memory usage in the second phase of index
+     * validation. Returns whether the memory limit is sufficient to report at least one index entry
+     * inconsistency and continue with the second phase of validation.
+     */
+    bool limitMemoryUsageForSecondPhase(ValidateResults* result) {
+        return true;
+    }
+
+    void validateIndexKeyCount(OperationContext* opCtx,
+                               const IndexCatalogEntry* index,
+                               long long* numRecords,
+                               IndexValidateResults& results);
+
+    uint64_t getTotalIndexKeys() {
+        return _numIndexEntries;
+    }
+
+    //////////////////////////////////////////////////////////
+    // Beginning of methods being public for testing purposes
+    //////////////////////////////////////////////////////////
+    int64_t getNumDocs() const {
+        return _numDocs;
+    }
+
+    uint32_t getBucketCount(size_t bucketNum) const {
+        invariant(bucketNum < _indexKeyBuckets.size());
+        return _indexKeyBuckets[bucketNum].indexKeyCount;
+    }
+
+    uint32_t getBucketSizeBytes(size_t bucketNum) const {
+        invariant(bucketNum < _indexKeyBuckets.size());
+        return _indexKeyBuckets[bucketNum].bucketSizeBytes;
+    }
+
+    /**
+     * Accumulates the info about a cell extracted from a shredded row-store record.
+     */
+    void addDocEntry(const FullCellView& val);
+
+    /**
+     * Accumulates the info about a column-store index entry cell.
+     */
+    void addIndexEntry(const FullCellView& val);
+
+    static uint32_t _hash(const FullCellView& cell, uint64_t seed = 0) {
+        MONGO_STATIC_ASSERT_MSG(std::is_same<decltype(cell.rid), int64_t>::value,
+                                "'rid' should be an int64 (i.e., it's not a class) so that we're "
+                                "not hashing based on the compiler chosen layout for a struct.");
+
+        MONGO_STATIC_ASSERT_MSG(sizeof(uint64_t) <= sizeof(size_t),
+                                "Unable to safely store a uint64_t value in a size_t variable");
+
+        size_t hash = static_cast<size_t>(seed);
+        boost::hash_combine(hash,
+                            absl::hash_internal::CityHash64(cell.path.rawData(), cell.path.size()));
+        boost::hash_combine(hash,
+                            absl::hash_internal::CityHash64(
+                                reinterpret_cast<const char*>(&cell.rid), sizeof(cell.rid)));
+        boost::hash_combine(
+            hash, absl::hash_internal::CityHash64(cell.value.rawData(), cell.value.size()));
+        return hash;
+    }
+    //////////////////////////////////////////////////////////
+    // End of methods being public for testing purposes
+    //////////////////////////////////////////////////////////
+
+private:
+    ColumnIndexConsistency() = delete;
+
+    /**
+     * Pinpoints the errors from the accumulated information from traversal of both row-store and
+     * column-store index and adds these errors to 'results'.
+     */
+    void _addIndexEntryErrors(OperationContext* opCtx,
+                              const IndexCatalogEntry* index,
+                              ValidateResults* results);
+
+    void _tabulateEntry(const FullCellView& cell, int step);
+
+    BSONObj _generateInfo(const std::string& indexName,
+                          const RecordId& recordId,
+                          PathView path,
+                          RowId rowId,
+                          StringData value = "");
+
+    void _updateSuspectList(const FullCellView& cell, ValidateResults* results);
+
+    std::vector<FullCellValue> _missingIndexEntries;
+    std::vector<FullCellValue> _extraIndexEntries;
+
+    stdx::unordered_set<RowId> _suspects;
+    int64_t _numDocs = 0;
+    int64_t _numIndexEntries = 0;
+    BufBuilder _cellBuilder;
+};
+}  // namespace mongo
diff --git a/src/mongo/db/catalog/column_index_consistency_test.cpp b/src/mongo/db/catalog/column_index_consistency_test.cpp
new file mode 100644
index 00000000000..0199b9627e1
--- /dev/null
+++ b/src/mongo/db/catalog/column_index_consistency_test.cpp
@@ -0,0 +1,261 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/catalog/catalog_test_fixture.h"
+#include "mongo/db/catalog/column_index_consistency.h"
+#include "mongo/idl/server_parameter_test_util.h"
+#include "mongo/stdx/unordered_set.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo {
+
+namespace {
+
+const NamespaceString kNss = NamespaceString("test.t");
+
+class ColumnIndexConsistencyTest : public CatalogTestFixture {
+protected:
+    ColumnIndexConsistencyTest(Options options = {}) : CatalogTestFixture(std::move(options)) {}
+
+    ColumnIndexConsistency createColumnStoreConsistency(
+        const size_t numHashBuckets = IndexConsistency::kNumHashBuckets) {
+        return ColumnIndexConsistency(operationContext(), _validateState.get(), numHashBuckets);
+    }
+
+    void setUp() override {
+        CatalogTestFixture::setUp();
+
+        auto opCtx = operationContext();
+
+        // Create collection kNss for unit tests to use. It will possess a default _id index.
+        const CollectionOptions defaultCollectionOptions;
+        ASSERT_OK(storageInterface()->createCollection(opCtx, kNss, defaultCollectionOptions));
+
+        _validateState = std::make_unique<CollectionValidation::ValidateState>(
+            opCtx,
+            kNss,
+            CollectionValidation::ValidateMode::kForeground,
+            CollectionValidation::RepairMode::kNone);
+    };
+
+    void tearDown() override {
+        _validateState.reset();
+        CatalogTestFixture::tearDown();
+    }
+
+    std::unique_ptr<CollectionValidation::ValidateState> _validateState;
+};
+
+const std::vector<FullCellView> kEntries{
+    {"path1"_sd, 1, "value1"_sd},
+    {"path1"_sd, 2, "value1"_sd},
+    {"path1"_sd, 3, "value1"_sd},
+
+    {"path1"_sd, 4, "value1"_sd},
+    {"path2"_sd, 4, "value1"_sd},
+    {"path3"_sd, 4, "value1"_sd},
+
+    {"path4"_sd, 5, "value1"_sd},
+    {"path4"_sd, 5, "value2"_sd},
+    {"path4"_sd, 5, "value3"_sd},
+
+    {"path4"_sd, 0x1000'0000'0000'0001, "value3"_sd},
+    {"path4"_sd, 0x2000'0000'0000'0001, "value3"_sd},
+    {"path4"_sd, 0x3000'0000'0000'0001, "value3"_sd},
+
+    {"path44"_sd, 5, "value3"_sd},
+    {"path4"_sd, 5, "value33"_sd},
+
+    {"path1"_sd, 1, "1value1"_sd},
+    {"path1"_sd, 2, "1value1"_sd},
+    {"path1"_sd, 3, "1value1"_sd},
+
+    {"path1"_sd, 4, "1value1"_sd},
+    {"path2"_sd, 4, "1value1"_sd},
+    {"path3"_sd, 4, "1value1"_sd},
+
+    {"path4"_sd, 5, "1value1"_sd},
+    {"path4"_sd, 5, "1value2"_sd},
+    {"path4"_sd, 5, "1value3"_sd},
+
+    {"path4"_sd, 0x1000'0000'0000'0001, "value32"_sd},
+    {"path4"_sd, 0x2000'0000'0000'0001, "value33"_sd},
+    {"path4"_sd, 0x3000'0000'0000'0001, "value34"_sd},
+
+    {"path44"_sd, 5, "4value3"_sd},
+    {"path4"_sd, 5, "4value33"_sd},
+
+    // A couple duplicates
+    {"path44"_sd, 5, "4value3"_sd},
+    {"path4"_sd, 5, "4value33"_sd},
+
+    {"path.some.path"_sd, 5, "a dog likes to chew"_sd},
+    {"path.some.path"_sd, 6, "a dog likes to bite"_sd},
+    {"path.some.path"_sd, 7, "a dog likes to sleep"_sd},
+    {"path.some.path"_sd, 8, "a dog likes to eat"_sd},
+    {"path.some.path"_sd, 9, "a dog likes to walk"_sd},
+    {"path.some.path"_sd, 10, "a dog likes to play"_sd},
+    {"path.some.path"_sd, 11, "a dog likes to drool"_sd},
+    {"path.some.path"_sd, 12, "a dog likes to sniff"_sd},
+    {"path.some.path"_sd, 13, "a dog likes to snuggle"_sd},
+    {"path.some.path"_sd, 14, "a dog likes to go to the park with other dogs"_sd},
+
+    {"path.some.otherpath"_sd, 1, "life is understood backwards but lived forwards"_sd},
+    {"path.some.otherpath"_sd, 2, "the key is not spending time but investing it"_sd},
+    {"path.some.otherpath"_sd, 3, "if we take care of the moments, the years"_sd},
+    {"path.some.otherpath"_sd, 4, "time is an illusion"_sd},
+    {"path.some.otherpath"_sd, 5, "life and times"_sd},
+    {"path.some.otherpath"_sd, 667, "how many more times"_sd},
+
+    {"still.some.otherpath"_sd, 667, "islands in the stream"_sd},
+    {"still.some.otherpath"_sd, 668, "the gambler"_sd},
+    {"still.some.otherpath"_sd, 669, "coward of the county"_sd},
+    {"still.some.otherpath"_sd, 662, "lady"_sd},
+    {"still.some.otherpath"_sd, 661, "we've got tonight"_sd},
+    {"still.some.otherpath"_sd, 660, "through the years"_sd},
+};
+
+TEST_F(ColumnIndexConsistencyTest, VerifyHashFunction) {
+    stdx::unordered_set<int64_t> hashes;
+    for (const auto& v : kEntries) {
+        hashes.insert(ColumnIndexConsistency::_hash(v));
+    }
+    ASSERT_EQ(hashes.size(), 50u);
+}
+
+TEST_F(ColumnIndexConsistencyTest, VerifyEntriesEqual) {
+    ColumnIndexConsistency cic = createColumnStoreConsistency();
+
+    for (const auto& v : kEntries) {
+        cic.addDocEntry(v);
+    }
+    for (const auto& v : kEntries) {
+        cic.addIndexEntry(v);
+    }
+    ASSERT_FALSE(cic.haveEntryMismatch());
+    ASSERT_EQ(cic.getNumDocs(), cic.getTotalIndexKeys());
+}
+
+TEST_F(ColumnIndexConsistencyTest, VerifyEntriesNotEqual) {
+    ColumnIndexConsistency cic = createColumnStoreConsistency();
+
+    for (const auto& v : kEntries) {
+        cic.addDocEntry(v);
+    }
+    cic.addDocEntry({"path444"_sd, 4, "value1"_sd});
+    for (const auto& v : kEntries) {
+        cic.addIndexEntry(v);
+    }
+    cic.addIndexEntry({"path444"_sd, 4, "value10"_sd});
+    ASSERT_TRUE(cic.haveEntryMismatch());
+    ASSERT_EQ(cic.getNumDocs(), cic.getTotalIndexKeys());
+}
+
+TEST_F(ColumnIndexConsistencyTest, VerifyUnorderedEntriesEqual) {
+    ColumnIndexConsistency cic = createColumnStoreConsistency();
+
+    for (int i = 0; i < 10000; i++) {
+        cic.addDocEntry({"foo"_sd, i, "bar"_sd});
+    }
+
+    for (int i = 9999; i >= 0; i--) {
+        cic.addIndexEntry({"foo"_sd, i, "bar"_sd});
+    }
+    ASSERT_EQ(cic.getNumDocs(), 10000u);
+    ASSERT_EQ(cic.getNumDocs(), cic.getTotalIndexKeys());
+    ASSERT_FALSE(cic.haveEntryMismatch());
+
+    cic.addDocEntry({"foo"_sd, 5311, "bar"_sd});
+    ASSERT_NE(cic.getNumDocs(), cic.getTotalIndexKeys());
+    ASSERT_TRUE(cic.haveEntryMismatch());
+
+    cic.addIndexEntry({"foo"_sd, 5311, "bar"_sd});
+    ASSERT_EQ(cic.getNumDocs(), cic.getTotalIndexKeys());
+    ASSERT_FALSE(cic.haveEntryMismatch());
+
+    for (const auto& v : kEntries) {
+        cic.addDocEntry(v);
+    }
+    for (auto it = kEntries.crbegin(); it != kEntries.crend(); it++) {
+        cic.addIndexEntry(*it);
+    }
+    ASSERT_FALSE(cic.haveEntryMismatch());
+    ASSERT_EQ(cic.getNumDocs(), cic.getTotalIndexKeys());
+}
+
+TEST_F(ColumnIndexConsistencyTest, VerifyEvenBucketDistribution) {
+    const size_t numBuckets = 1000;
+    ColumnIndexConsistency cic = createColumnStoreConsistency(numBuckets);
+
+    for (int i = 0; i < 10000; i++) {
+        cic.addDocEntry({"foo"_sd, i, "bar"_sd});
+    }
+
+    uint32_t maxValue = 0;
+    for (size_t i = 0; i < numBuckets; i++) {
+        maxValue = std::max(cic.getBucketCount(i), maxValue);
+    }
+    ASSERT_LT(maxValue, uint32_t(40));  // perfect would be 20, so 2x that is our made-up threshold
+}
+
+TEST_F(ColumnIndexConsistencyTest, VerifyEvenBucketDistributionFewerInputs) {
+    // fewer, more varied inputs
+    const size_t numBuckets = 5;
+    ColumnIndexConsistency cic = createColumnStoreConsistency(numBuckets);
+    for (const auto& v : kEntries) {
+        cic.addDocEntry(v);
+    }
+    uint32_t maxValue = 0;
+    for (size_t i = 0; i < numBuckets; i++) {
+        maxValue = std::max(cic.getBucketCount(i), maxValue);
+    }
+    ASSERT_LT(maxValue, uint32_t(40));  // perfect would be 20, 2x that is our made-up threshold
+}
+
+TEST_F(ColumnIndexConsistencyTest, VerifySizeCounters) {
+    const size_t numBuckets = 5;
+    ColumnIndexConsistency cic = createColumnStoreConsistency(numBuckets);
+
+    cic.addDocEntry({""_sd, 1, "b"_sd});   // 9 bytes
+    cic.addDocEntry({"f"_sd, 0, ""_sd});   // 9 bytes
+    cic.addDocEntry({"f"_sd, 1, "b"_sd});  // 10 bytes
+
+    cic.addIndexEntry({""_sd, 1, "b"_sd});
+    cic.addIndexEntry({"f"_sd, 0, ""_sd});
+    cic.addIndexEntry({"f"_sd, 1, "b"_sd});
+
+    size_t totalSize = 0;
+    for (size_t i = 0; i < numBuckets; i++) {
+        totalSize += cic.getBucketSizeBytes(i);
+    }
+    ASSERT_EQ(totalSize, 112u);  // 28 * 2 (hashed twice) * 2 (hash doc + index) = 112
+}
+
+}  // namespace
+}  // namespace mongo
diff --git a/src/mongo/db/catalog/database_impl.cpp b/src/mongo/db/catalog/database_impl.cpp
index 54d9b3fd930..47416142ee4 100644
--- a/src/mongo/db/catalog/database_impl.cpp
+++ b/src/mongo/db/catalog/database_impl.cpp
@@ -57,6 +57,7 @@
 #include "mongo/db/query/query_knobs_gen.h"
 #include "mongo/db/repl/drop_pending_collection_reaper.h"
 #include "mongo/db/repl/oplog.h"
+#include "mongo/db/repl/replication_consistency_markers_impl.h"
 #include "mongo/db/repl/replication_coordinator.h"
 #include "mongo/db/s/collection_sharding_state.h"
 #include "mongo/db/s/database_sharding_state.h"
@@ -749,6 +750,21 @@ Collection* DatabaseImpl::createVirtualCollection(OperationContext* opCtx,
                              vopts);
 }
 
+/**
+ * Some system collections (namely, 'config.transactions' and
+ * 'local.replset.oplogTruncateAfterPoint') are special internal collections that are written to
+ * without updating indexes, so there's no value in creating an index on them.
+ *
+ * @return true if any modification on the collection data leads to updating the indexes defined on
+ * it.
+ */
+bool doesCollectionModificationsUpdateIndexes(StringData collName) {
+    // TODO SERVER-72496: investigate whether 'config.transactions' should remain here or not.
+    return collName != "config.transactions" &&
+        collName !=
+        repl::ReplicationConsistencyMarkersImpl::kDefaultOplogTruncateAfterPointNamespace;
+}
+
 Collection* DatabaseImpl::_createCollection(
     OperationContext* opCtx,
     const NamespaceString& nss,
@@ -856,7 +872,8 @@ Collection* DatabaseImpl::_createCollection(
                 opCtx,
                 collection,
                 !idIndex.isEmpty() ? idIndex : ic->getDefaultIdIndexSpec(collection)));
-            createColumnIndex = createColumnIndexOnAllCollections.shouldFail();
+            createColumnIndex = createColumnIndexOnAllCollections.shouldFail() &&
+                doesCollectionModificationsUpdateIndexes(nss.ns());
         } else {
             // autoIndexId: false is only allowed on unreplicated collections.
             uassert(50001,
diff --git a/src/mongo/db/catalog/index_consistency.cpp b/src/mongo/db/catalog/index_consistency.cpp
index 40dec646b08..f16bb7ebfa3 100644
--- a/src/mongo/db/catalog/index_consistency.cpp
+++ b/src/mongo/db/catalog/index_consistency.cpp
@@ -43,17 +43,22 @@
 #include "mongo/db/index/index_descriptor.h"
 #include "mongo/db/multi_key_path_tracker.h"
 #include "mongo/db/record_id_helpers.h"
+#include "mongo/db/storage/execution_context.h"
 #include "mongo/logv2/log.h"
 #include "mongo/util/string_map.h"
+#include "mongo/util/testing_proctor.h"
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kStorage
 
 
 namespace mongo {
 
+const long long IndexConsistency::kInterruptIntervalNumRecords = 4096;
+const size_t IndexConsistency::kNumHashBuckets = 1U << 16;
+
 namespace {
 
-const size_t kNumHashBuckets = 1U << 16;
+MONGO_FAIL_POINT_DEFINE(crashOnMultikeyValidateFailure);
 
 StringSet::hasher hash;
 
@@ -95,18 +100,31 @@ IndexEntryInfo::IndexEntryInfo(const IndexInfo& indexInfo,
       keyString(entryKeyString) {}
 
 IndexConsistency::IndexConsistency(OperationContext* opCtx,
-                                   CollectionValidation::ValidateState* validateState)
+                                   CollectionValidation::ValidateState* validateState,
+                                   const size_t numHashBuckets)
     : _validateState(validateState), _firstPhase(true) {
-    _indexKeyBuckets.resize(kNumHashBuckets);
+    _indexKeyBuckets.resize(numHashBuckets);
+}
+
+void IndexConsistency::setSecondPhase() {
+    invariant(_firstPhase);
+    _firstPhase = false;
+}
 
+KeyStringIndexConsistency::KeyStringIndexConsistency(
+    OperationContext* opCtx,
+    CollectionValidation::ValidateState* validateState,
+    const size_t numHashBuckets)
+    : IndexConsistency(opCtx, validateState, numHashBuckets) {
     for (const auto& index : _validateState->getIndexes()) {
-        const IndexDescriptor* descriptor = index->descriptor();
+        const auto descriptor = index->descriptor();
         IndexAccessMethod* accessMethod = const_cast<IndexAccessMethod*>(index->accessMethod());
         _indexesInfo.emplace(descriptor->indexName(), IndexInfo(descriptor, accessMethod));
     }
 }
 
-void IndexConsistency::addMultikeyMetadataPath(const KeyString::Value& ks, IndexInfo* indexInfo) {
+void KeyStringIndexConsistency::addMultikeyMetadataPath(const KeyString::Value& ks,
+                                                        IndexInfo* indexInfo) {
     auto hash = _hashKeyString(ks, indexInfo->indexNameHash);
     if (MONGO_unlikely(_validateState->extraLoggingForTest())) {
         LOGV2(6208500,
@@ -117,8 +135,8 @@ void IndexConsistency::addMultikeyMetadataPath(const KeyString::Value& ks, Index
     indexInfo->hashedMultikeyMetadataPaths.emplace(hash);
 }
 
-void IndexConsistency::removeMultikeyMetadataPath(const KeyString::Value& ks,
-                                                  IndexInfo* indexInfo) {
+void KeyStringIndexConsistency::removeMultikeyMetadataPath(const KeyString::Value& ks,
+                                                           IndexInfo* indexInfo) {
     auto hash = _hashKeyString(ks, indexInfo->indexNameHash);
     if (MONGO_unlikely(_validateState->extraLoggingForTest())) {
         LOGV2(6208501,
@@ -129,23 +147,18 @@ void IndexConsistency::removeMultikeyMetadataPath(const KeyString::Value& ks,
     indexInfo->hashedMultikeyMetadataPaths.erase(hash);
 }
 
-size_t IndexConsistency::getMultikeyMetadataPathCount(IndexInfo* indexInfo) {
+size_t KeyStringIndexConsistency::getMultikeyMetadataPathCount(IndexInfo* indexInfo) {
     return indexInfo->hashedMultikeyMetadataPaths.size();
 }
 
-bool IndexConsistency::haveEntryMismatch() const {
+bool KeyStringIndexConsistency::haveEntryMismatch() const {
     return std::any_of(_indexKeyBuckets.begin(),
                        _indexKeyBuckets.end(),
                        [](const IndexKeyBucket& bucket) -> bool { return bucket.indexKeyCount; });
 }
 
-void IndexConsistency::setSecondPhase() {
-    invariant(_firstPhase);
-    _firstPhase = false;
-}
-
-void IndexConsistency::repairMissingIndexEntries(OperationContext* opCtx,
-                                                 ValidateResults* results) {
+void KeyStringIndexConsistency::repairIndexEntries(OperationContext* opCtx,
+                                                   ValidateResults* results) {
     invariant(_validateState->getIndexes().size() > 0);
     std::shared_ptr<const IndexCatalogEntry> index = _validateState->getIndexes().front();
     for (auto it = _missingIndexEntries.begin(); it != _missingIndexEntries.end();) {
@@ -194,7 +207,8 @@ void IndexConsistency::repairMissingIndexEntries(OperationContext* opCtx,
     }
 }
 
-void IndexConsistency::addIndexEntryErrors(ValidateResults* results) {
+void KeyStringIndexConsistency::addIndexEntryErrors(OperationContext* opCtx,
+                                                    ValidateResults* results) {
     invariant(!_firstPhase);
 
     // We'll report up to 1MB for extra index entry errors and missing index entry errors.
@@ -290,8 +304,8 @@ void IndexConsistency::addIndexEntryErrors(ValidateResults* results) {
     }
 }
 
-void IndexConsistency::addDocumentMultikeyPaths(IndexInfo* indexInfo,
-                                                const MultikeyPaths& newPaths) {
+void KeyStringIndexConsistency::addDocumentMultikeyPaths(IndexInfo* indexInfo,
+                                                         const MultikeyPaths& newPaths) {
     invariant(newPaths.size());
     if (indexInfo->docMultikeyPaths.size()) {
         MultikeyPathTracker::mergeMultikeyPaths(&indexInfo->docMultikeyPaths, newPaths);
@@ -301,10 +315,10 @@ void IndexConsistency::addDocumentMultikeyPaths(IndexInfo* indexInfo,
     }
 }
 
-void IndexConsistency::addDocKey(OperationContext* opCtx,
-                                 const KeyString::Value& ks,
-                                 IndexInfo* indexInfo,
-                                 const RecordId& recordId) {
+void KeyStringIndexConsistency::addDocKey(OperationContext* opCtx,
+                                          const KeyString::Value& ks,
+                                          IndexInfo* indexInfo,
+                                          const RecordId& recordId) {
     auto rawHash = ks.hash(indexInfo->indexNameHash);
     auto hashLower = rawHash % kNumHashBuckets;
     auto hashUpper = (rawHash / kNumHashBuckets) % kNumHashBuckets;
@@ -358,11 +372,11 @@ void IndexConsistency::addDocKey(OperationContext* opCtx,
     }
 }
 
-void IndexConsistency::addIndexKey(OperationContext* opCtx,
-                                   const KeyString::Value& ks,
-                                   IndexInfo* indexInfo,
-                                   const RecordId& recordId,
-                                   ValidateResults* results) {
+void KeyStringIndexConsistency::addIndexKey(OperationContext* opCtx,
+                                            const KeyString::Value& ks,
+                                            IndexInfo* indexInfo,
+                                            const RecordId& recordId,
+                                            ValidateResults* results) {
     auto rawHash = ks.hash(indexInfo->indexNameHash);
     auto hashLower = rawHash % kNumHashBuckets;
     auto hashUpper = (rawHash / kNumHashBuckets) % kNumHashBuckets;
@@ -442,7 +456,7 @@ void IndexConsistency::addIndexKey(OperationContext* opCtx,
     }
 }
 
-bool IndexConsistency::limitMemoryUsageForSecondPhase(ValidateResults* result) {
+bool KeyStringIndexConsistency::limitMemoryUsageForSecondPhase(ValidateResults* result) {
     invariant(!_firstPhase);
 
     const uint32_t maxMemoryUsageBytes = maxValidateMemoryUsageMB.load() * 1024 * 1024;
@@ -508,11 +522,436 @@ bool IndexConsistency::limitMemoryUsageForSecondPhase(ValidateResults* result) {
     return true;
 }
 
-BSONObj IndexConsistency::_generateInfo(const std::string& indexName,
-                                        const BSONObj& keyPattern,
-                                        const RecordId& recordId,
-                                        const BSONObj& indexKey,
-                                        const BSONObj& idKey) {
+void KeyStringIndexConsistency::validateIndexKeyCount(OperationContext* opCtx,
+                                                      const IndexCatalogEntry* index,
+                                                      long long* numRecords,
+                                                      IndexValidateResults& results) {
+    // Fetch the total number of index entries we previously found traversing the index.
+    const IndexDescriptor* desc = index->descriptor();
+    const std::string indexName = desc->indexName();
+    IndexInfo* indexInfo = &this->getIndexInfo(indexName);
+    const auto numTotalKeys = indexInfo->numKeys;
+
+    // Update numRecords by subtracting number of records removed from record store in repair mode
+    // when validating index consistency
+    (*numRecords) -= results.keysRemovedFromRecordStore;
+
+    // Do not fail on finding too few index entries compared to collection entries when full:false.
+    bool hasTooFewKeys = false;
+    const bool noErrorOnTooFewKeys = !_validateState->isFullIndexValidation();
+
+    if (desc->isIdIndex() && numTotalKeys != (*numRecords)) {
+        hasTooFewKeys = (numTotalKeys < (*numRecords));
+        const std::string msg = str::stream()
+            << "number of _id index entries (" << numTotalKeys
+            << ") does not match the number of documents in the index (" << (*numRecords) << ")";
+        if (noErrorOnTooFewKeys && (numTotalKeys < (*numRecords))) {
+            results.warnings.push_back(msg);
+        } else {
+            results.errors.push_back(msg);
+            results.valid = false;
+        }
+    }
+
+    // Hashed indexes may never be multikey.
+    if (desc->getAccessMethodName() == IndexNames::HASHED &&
+        index->isMultikey(opCtx, _validateState->getCollection())) {
+        results.errors.push_back(str::stream() << "Hashed index is incorrectly marked multikey: "
+                                               << desc->indexName());
+        results.valid = false;
+    }
+
+    // Confirm that the number of index entries is not greater than the number of documents in the
+    // collection. This check is only valid for indexes that are not multikey (indexed arrays
+    // produce an index key per array entry) and not $** indexes which can produce index keys for
+    // multiple paths within a single document.
+    if (results.valid && !index->isMultikey(opCtx, _validateState->getCollection()) &&
+        desc->getIndexType() != IndexType::INDEX_WILDCARD && numTotalKeys > (*numRecords)) {
+        const std::string err = str::stream()
+            << "index " << desc->indexName() << " is not multi-key, but has more entries ("
+            << numTotalKeys << ") than documents in the index (" << (*numRecords) << ")";
+        results.errors.push_back(err);
+        results.valid = false;
+    }
+
+    // Ignore any indexes with a special access method. If an access method name is given, the
+    // index may be a full text, geo or special index plugin with different semantics.
+    if (results.valid && !desc->isSparse() && !desc->isPartial() && !desc->isIdIndex() &&
+        desc->getAccessMethodName() == "" && numTotalKeys < (*numRecords)) {
+        hasTooFewKeys = true;
+        const std::string msg = str::stream()
+            << "index " << desc->indexName() << " is not sparse or partial, but has fewer entries ("
+            << numTotalKeys << ") than documents in the index (" << (*numRecords) << ")";
+        if (noErrorOnTooFewKeys) {
+            results.warnings.push_back(msg);
+        } else {
+            results.errors.push_back(msg);
+            results.valid = false;
+        }
+    }
+
+    if (!_validateState->isFullIndexValidation() && hasTooFewKeys) {
+        const std::string warning = str::stream()
+            << "index " << desc->indexName() << " has fewer keys than records."
+            << " Please re-run the validate command with {full: true}";
+        results.warnings.push_back(warning);
+    }
+}
+
+namespace {
+// Ensures that index entries are in increasing or decreasing order.
+void _validateKeyOrder(OperationContext* opCtx,
+                       const IndexCatalogEntry* index,
+                       const KeyString::Value& currKey,
+                       const KeyString::Value& prevKey,
+                       IndexValidateResults* results) {
+    const auto descriptor = index->descriptor();
+    const bool unique = descriptor->unique();
+
+    // KeyStrings will be in strictly increasing order because all keys are sorted and they are in
+    // the format (Key, RID), and all RecordIDs are unique.
+    if (currKey.compare(prevKey) <= 0) {
+        if (results && results->valid) {
+            results->errors.push_back(str::stream()
+                                      << "index '" << descriptor->indexName()
+                                      << "' is not in strictly ascending or descending order");
+        }
+        if (results) {
+            results->valid = false;
+        }
+        return;
+    }
+
+    if (unique) {
+        // Unique indexes must not have duplicate keys.
+        const int cmp = currKey.compareWithoutRecordIdLong(prevKey);
+        if (cmp != 0) {
+            return;
+        }
+
+        if (results && results->valid) {
+            const auto bsonKey =
+                KeyString::toBson(currKey, Ordering::make(descriptor->keyPattern()));
+            const auto firstRecordId =
+                KeyString::decodeRecordIdLongAtEnd(prevKey.getBuffer(), prevKey.getSize());
+            const auto secondRecordId =
+                KeyString::decodeRecordIdLongAtEnd(currKey.getBuffer(), currKey.getSize());
+            results->errors.push_back(str::stream() << "Unique index '" << descriptor->indexName()
+                                                    << "' has duplicate key: " << bsonKey
+                                                    << ", first record: " << firstRecordId
+                                                    << ", second record: " << secondRecordId);
+        }
+        if (results) {
+            results->valid = false;
+        }
+    }
+}
+}  // namespace
+
+int64_t KeyStringIndexConsistency::traverseIndex(OperationContext* opCtx,
+                                                 const IndexCatalogEntry* index,
+                                                 ProgressMeterHolder& _progress,
+                                                 ValidateResults* results) {
+    const auto descriptor = index->descriptor();
+    const auto indexName = descriptor->indexName();
+    auto& indexResults = results->indexResultsMap[indexName];
+    IndexInfo& indexInfo = this->getIndexInfo(indexName);
+    int64_t numKeys = 0;
+
+    bool isFirstEntry = true;
+
+    const KeyString::Version version =
+        index->accessMethod()->asSortedData()->getSortedDataInterface()->getKeyStringVersion();
+
+    KeyString::Builder firstKeyStringBuilder(
+        version, BSONObj(), indexInfo.ord, KeyString::Discriminator::kExclusiveBefore);
+    const KeyString::Value firstKeyString = firstKeyStringBuilder.getValueCopy();
+    KeyString::Value prevIndexKeyStringValue;
+
+    // Ensure that this index has an open index cursor.
+    const auto indexCursorIt = _validateState->getIndexCursors().find(indexName);
+    invariant(indexCursorIt != _validateState->getIndexCursors().end());
+
+    const std::unique_ptr<SortedDataInterfaceThrottleCursor>& indexCursor = indexCursorIt->second;
+
+    boost::optional<KeyStringEntry> indexEntry;
+    try {
+        indexEntry = indexCursor->seekForKeyString(opCtx, firstKeyString);
+    } catch (const DBException& ex) {
+        if (TestingProctor::instance().isEnabled() && ex.code() != ErrorCodes::WriteConflict) {
+            LOGV2_FATAL(5318400,
+                        "Error seeking to first key",
+                        "error"_attr = ex.toString(),
+                        "index"_attr = indexName,
+                        "key"_attr = firstKeyString.toString());
+        }
+        throw;
+    }
+
+    const auto keyFormat =
+        index->accessMethod()->asSortedData()->getSortedDataInterface()->rsKeyFormat();
+    const RecordId kWildcardMultikeyMetadataRecordId = record_id_helpers::reservedIdFor(
+        record_id_helpers::ReservationId::kWildcardMultikeyMetadataId, keyFormat);
+
+    // Warn about unique indexes with keys in old format (without record id).
+    bool foundOldUniqueIndexKeys = false;
+
+    while (indexEntry) {
+        if (!isFirstEntry) {
+            _validateKeyOrder(
+                opCtx, index, indexEntry->keyString, prevIndexKeyStringValue, &indexResults);
+        }
+
+        if (!foundOldUniqueIndexKeys && !descriptor->isIdIndex() && descriptor->unique() &&
+            !indexCursor->isRecordIdAtEndOfKeyString()) {
+            results->warnings.push_back(
+                fmt::format("Unique index {} has one or more keys in the old format (without "
+                            "embedded record id). First record: {}",
+                            indexInfo.indexName,
+                            indexEntry->loc.toString()));
+            foundOldUniqueIndexKeys = true;
+        }
+
+        const bool isMetadataKey = indexEntry->loc == kWildcardMultikeyMetadataRecordId;
+        if (descriptor->getIndexType() == IndexType::INDEX_WILDCARD && isMetadataKey) {
+            this->removeMultikeyMetadataPath(indexEntry->keyString, &indexInfo);
+        } else {
+            try {
+                this->addIndexKey(
+                    opCtx, indexEntry->keyString, &indexInfo, indexEntry->loc, results);
+            } catch (const DBException& e) {
+                StringBuilder ss;
+                ss << "Parsing index key for " << indexInfo.indexName << " recId "
+                   << indexEntry->loc << " threw exception " << e.toString();
+                results->errors.push_back(ss.str());
+                results->valid = false;
+            }
+        }
+        {
+            stdx::unique_lock<Client> lk(*opCtx->getClient());
+            _progress.get(lk)->hit();
+        }
+        numKeys++;
+        isFirstEntry = false;
+        prevIndexKeyStringValue = indexEntry->keyString;
+
+        if (numKeys % kInterruptIntervalNumRecords == 0) {
+            // Periodically checks for interrupts and yields.
+            opCtx->checkForInterrupt();
+            _validateState->yield(opCtx);
+        }
+
+        try {
+            indexEntry = indexCursor->nextKeyString(opCtx);
+        } catch (const DBException& ex) {
+            if (TestingProctor::instance().isEnabled() && ex.code() != ErrorCodes::WriteConflict) {
+                LOGV2_FATAL(5318401,
+                            "Error advancing index cursor",
+                            "error"_attr = ex.toString(),
+                            "index"_attr = indexName,
+                            "prevKey"_attr = prevIndexKeyStringValue.toString());
+            }
+            throw;
+        }
+    }
+
+    if (results && this->getMultikeyMetadataPathCount(&indexInfo) > 0) {
+        results->errors.push_back(str::stream()
+                                  << "Index '" << descriptor->indexName()
+                                  << "' has one or more missing multikey metadata index keys");
+        results->valid = false;
+    }
+
+    // Adjust multikey metadata when allowed. These states are all allowed by the design of
+    // multikey. A collection should still be valid without these adjustments.
+    if (_validateState->adjustMultikey()) {
+
+        // If this collection has documents that make this index multikey, then check whether those
+        // multikey paths match the index's metadata.
+        const auto indexPaths = index->getMultikeyPaths(opCtx, _validateState->getCollection());
+        const auto& documentPaths = indexInfo.docMultikeyPaths;
+        if (indexInfo.multikeyDocs && documentPaths != indexPaths) {
+            LOGV2(5367500,
+                  "Index's multikey paths do not match those of its documents",
+                  "index"_attr = descriptor->indexName(),
+                  "indexPaths"_attr = MultikeyPathTracker::dumpMultikeyPaths(indexPaths),
+                  "documentPaths"_attr = MultikeyPathTracker::dumpMultikeyPaths(documentPaths));
+
+            // Since we have the correct multikey path information for this index, we can tighten up
+            // its metadata to improve query performance. This may apply in two distinct scenarios:
+            // 1. Collection data has changed such that the current multikey paths on the index
+            // are too permissive and certain document paths are no longer multikey.
+            // 2. This index was built before 3.4, and there is no multikey path information for
+            // the index. We can effectively 'upgrade' the index so that it does not need to be
+            // rebuilt to update this information.
+            writeConflictRetry(opCtx, "updateMultikeyPaths", _validateState->nss().ns(), [&]() {
+                WriteUnitOfWork wuow(opCtx);
+                auto writeableIndex = const_cast<IndexCatalogEntry*>(index);
+                const bool isMultikey = true;
+                writeableIndex->forceSetMultikey(
+                    opCtx, _validateState->getCollection(), isMultikey, documentPaths);
+                wuow.commit();
+            });
+
+            if (results) {
+                results->warnings.push_back(str::stream() << "Updated index multikey metadata"
+                                                          << ": " << descriptor->indexName());
+                results->repaired = true;
+            }
+        }
+
+        // If this index does not need to be multikey, then unset the flag.
+        if (index->isMultikey(opCtx, _validateState->getCollection()) && !indexInfo.multikeyDocs) {
+            invariant(!indexInfo.docMultikeyPaths.size());
+
+            LOGV2(5367501,
+                  "Index is multikey but there are no multikey documents",
+                  "index"_attr = descriptor->indexName());
+
+            // This makes an improvement in the case that no documents make the index multikey and
+            // the flag can be unset entirely. This may be due to a change in the data or historical
+            // multikey bugs that have persisted incorrect multikey infomation.
+            writeConflictRetry(opCtx, "unsetMultikeyPaths", _validateState->nss().ns(), [&]() {
+                WriteUnitOfWork wuow(opCtx);
+                auto writeableIndex = const_cast<IndexCatalogEntry*>(index);
+                const bool isMultikey = false;
+                writeableIndex->forceSetMultikey(
+                    opCtx, _validateState->getCollection(), isMultikey, {});
+                wuow.commit();
+            });
+
+            if (results) {
+                results->warnings.push_back(str::stream() << "Unset index multikey metadata"
+                                                          << ": " << descriptor->indexName());
+                results->repaired = true;
+            }
+        }
+    }
+
+    return numKeys;
+}
+
+void KeyStringIndexConsistency::traverseRecord(OperationContext* opCtx,
+                                               const CollectionPtr& coll,
+                                               const IndexCatalogEntry* index,
+                                               const RecordId& recordId,
+                                               const BSONObj& recordBson,
+                                               ValidateResults* results) {
+    const auto iam = index->accessMethod()->asSortedData();
+
+    const auto descriptor = index->descriptor();
+    SharedBufferFragmentBuilder pool(KeyString::HeapBuilder::kHeapAllocatorDefaultBytes);
+    auto& executionCtx = StorageExecutionContext::get(opCtx);
+
+    const auto documentKeySet = executionCtx.keys();
+    const auto multikeyMetadataKeys = executionCtx.multikeyMetadataKeys();
+    const auto documentMultikeyPaths = executionCtx.multikeyPaths();
+
+    iam->getKeys(opCtx,
+                 coll,
+                 pool,
+                 recordBson,
+                 InsertDeleteOptions::ConstraintEnforcementMode::kEnforceConstraints,
+                 SortedDataIndexAccessMethod::GetKeysContext::kAddingKeys,
+                 documentKeySet.get(),
+                 multikeyMetadataKeys.get(),
+                 documentMultikeyPaths.get(),
+                 recordId);
+
+    const bool shouldBeMultikey =
+        iam->shouldMarkIndexAsMultikey(documentKeySet->size(),
+                                       {multikeyMetadataKeys->begin(), multikeyMetadataKeys->end()},
+                                       *documentMultikeyPaths);
+
+    if (!index->isMultikey(opCtx, coll) && shouldBeMultikey) {
+        if (_validateState->fixErrors()) {
+            writeConflictRetry(opCtx, "setIndexAsMultikey", coll->ns().ns(), [&] {
+                WriteUnitOfWork wuow(opCtx);
+                coll->getIndexCatalog()->setMultikeyPaths(
+                    opCtx, coll, descriptor, *multikeyMetadataKeys, *documentMultikeyPaths);
+                wuow.commit();
+            });
+
+            LOGV2(4614700,
+                  "Index set to multikey",
+                  "indexName"_attr = descriptor->indexName(),
+                  "collection"_attr = coll->ns().ns());
+            results->warnings.push_back(str::stream() << "Index " << descriptor->indexName()
+                                                      << " set to multikey.");
+            results->repaired = true;
+        } else {
+            auto& curRecordResults = (results->indexResultsMap)[descriptor->indexName()];
+            const std::string msg = fmt::format(
+                "Index {} is not multikey but has more than one key in document with "
+                "RecordId({}) and {}",
+                descriptor->indexName(),
+                recordId.toString(),
+                recordBson.getField("_id").toString());
+            curRecordResults.errors.push_back(msg);
+            curRecordResults.valid = false;
+            if (crashOnMultikeyValidateFailure.shouldFail()) {
+                invariant(false, msg);
+            }
+        }
+    }
+
+    if (index->isMultikey(opCtx, coll)) {
+        const MultikeyPaths& indexPaths = index->getMultikeyPaths(opCtx, coll);
+        if (!MultikeyPathTracker::covers(indexPaths, *documentMultikeyPaths.get())) {
+            if (_validateState->fixErrors()) {
+                writeConflictRetry(opCtx, "increaseMultikeyPathCoverage", coll->ns().ns(), [&] {
+                    WriteUnitOfWork wuow(opCtx);
+                    coll->getIndexCatalog()->setMultikeyPaths(
+                        opCtx, coll, descriptor, *multikeyMetadataKeys, *documentMultikeyPaths);
+                    wuow.commit();
+                });
+
+                LOGV2(4614701,
+                      "Multikey paths updated to cover multikey document",
+                      "indexName"_attr = descriptor->indexName(),
+                      "collection"_attr = coll->ns().ns());
+                results->warnings.push_back(str::stream() << "Index " << descriptor->indexName()
+                                                          << " multikey paths updated.");
+                results->repaired = true;
+            } else {
+                const std::string msg = fmt::format(
+                    "Index {} multikey paths do not cover a document with RecordId({}) and {}",
+                    descriptor->indexName(),
+                    recordId.toString(),
+                    recordBson.getField("_id").toString());
+                auto& curRecordResults = (results->indexResultsMap)[descriptor->indexName()];
+                curRecordResults.errors.push_back(msg);
+                curRecordResults.valid = false;
+            }
+        }
+    }
+
+    IndexInfo& indexInfo = this->getIndexInfo(descriptor->indexName());
+    if (shouldBeMultikey) {
+        indexInfo.multikeyDocs = true;
+    }
+
+    // An empty set of multikey paths indicates that this index does not track path-level
+    // multikey information and we should do no tracking.
+    if (shouldBeMultikey && documentMultikeyPaths->size()) {
+        this->addDocumentMultikeyPaths(&indexInfo, *documentMultikeyPaths);
+    }
+
+    for (const auto& keyString : *multikeyMetadataKeys) {
+        this->addMultikeyMetadataPath(keyString, &indexInfo);
+    }
+
+    for (const auto& keyString : *documentKeySet) {
+        _totalIndexKeys++;
+        this->addDocKey(opCtx, keyString, &indexInfo, recordId);
+    }
+}
+
+BSONObj KeyStringIndexConsistency::_generateInfo(const std::string& indexName,
+                                                 const BSONObj& keyPattern,
+                                                 const RecordId& recordId,
+                                                 const BSONObj& indexKey,
+                                                 const BSONObj& idKey) {
 
     // We need to rehydrate the indexKey for improved readability.
     // {"": ObjectId(...)} -> {"_id": ObjectId(...)}
@@ -543,8 +982,8 @@ BSONObj IndexConsistency::_generateInfo(const std::string& indexName,
     return infoBuilder.obj();
 }
 
-uint32_t IndexConsistency::_hashKeyString(const KeyString::Value& ks,
-                                          uint32_t indexNameHash) const {
+uint32_t KeyStringIndexConsistency::_hashKeyString(const KeyString::Value& ks,
+                                                   const uint32_t indexNameHash) const {
     return ks.hash(indexNameHash);
 }
 }  // namespace mongo
diff --git a/src/mongo/db/catalog/index_consistency.h b/src/mongo/db/catalog/index_consistency.h
index ade97d20918..f873e83addf 100644
--- a/src/mongo/db/catalog/index_consistency.h
+++ b/src/mongo/db/catalog/index_consistency.h
@@ -32,6 +32,7 @@
 #include "mongo/bson/simple_bsonobj_comparator.h"
 #include "mongo/db/catalog/validate_state.h"
 #include "mongo/db/storage/key_string.h"
+#include "mongo/util/progress_meter.h"
 
 namespace mongo {
 
@@ -69,8 +70,7 @@ struct IndexInfo {
 };
 
 /**
- * Used by _missingIndexEntries to be able to easily access keyString during
- * repairMissingIndexEntries.
+ * Used by _missingIndexEntries to be able to easily access keyString during repairIndexEntries.
  */
 struct IndexEntryInfo {
     IndexEntryInfo(const IndexInfo& indexInfo,
@@ -85,90 +85,116 @@ struct IndexEntryInfo {
     KeyString::Value keyString;
 };
 
+
 /**
- * The IndexConsistency class is used to keep track of the index consistency.
- * It does this by using the index keys from index entries and index keys generated from the
- * document to ensure there is a one-to-one mapping for each key.
- * In addition, an IndexObserver class can be hooked into the IndexAccessMethod to inform
- * this class about changes to the indexes during a validation and compensate for them.
+ * The IndexConsistency class provides the base class definitions for index-consistency
+ * sub-classes. The base implementation in this class provides the basis for keeping track of the
+ * index consistency. It does this by using the index keys from index entries and index keys
+ * generated from the document to ensure there is a one-to-one mapping for each key.
  */
-class IndexConsistency final {
+class IndexConsistency {
     using IndexInfoMap = std::map<std::string, IndexInfo>;
     using IndexKey = std::pair<std::string, std::string>;
 
 public:
-    IndexConsistency(OperationContext* opCtx, CollectionValidation::ValidateState* validateState);
+    static const long long kInterruptIntervalNumRecords;
+    static const size_t kNumHashBuckets;
 
-    /**
-     * During the first phase of validation, given the document's key KeyString, increment the
-     * corresponding `_indexKeyCount` by hashing it.
-     * For the second phase of validation, keep track of the document keys that hashed to
-     * inconsistent hash buckets during the first phase of validation.
-     */
-    void addDocKey(OperationContext* opCtx,
-                   const KeyString::Value& ks,
-                   IndexInfo* indexInfo,
-                   const RecordId& recordId);
+    IndexConsistency(OperationContext* opCtx,
+                     CollectionValidation::ValidateState* validateState,
+                     size_t numHashBuckets = kNumHashBuckets);
 
     /**
-     * During the first phase of validation, given the index entry's KeyString, decrement the
-     * corresponding `_indexKeyCount` by hashing it.
-     * For the second phase of validation, try to match the index entry keys that hashed to
-     * inconsistent hash buckets during the first phase of validation to document keys.
+     * Informs the IndexConsistency object that we're advancing to the second phase of
+     * index validation.
      */
-    void addIndexKey(OperationContext* opCtx,
-                     const KeyString::Value& ks,
-                     IndexInfo* indexInfo,
-                     const RecordId& recordId,
-                     ValidateResults* results);
+    void setSecondPhase();
 
-    /**
-     * During the first phase of validation, tracks the multikey paths for every observed document.
-     */
-    void addDocumentMultikeyPaths(IndexInfo* indexInfo, const MultikeyPaths& multikeyPaths);
+    virtual ~IndexConsistency() = default;
 
-    /**
-     * To validate $** multikey metadata paths, we first scan the collection and add a hash of all
-     * multikey paths encountered to a set. We then scan the index for multikey metadata path
-     * entries and remove any path encountered. As we expect the index to contain a super-set of
-     * the collection paths, a non-empty set represents an invalid index.
-     */
-    void addMultikeyMetadataPath(const KeyString::Value& ks, IndexInfo* indexInfo);
-    void removeMultikeyMetadataPath(const KeyString::Value& ks, IndexInfo* indexInfo);
-    size_t getMultikeyMetadataPathCount(IndexInfo* indexInfo);
+protected:
+    struct IndexKeyBucket {
+        uint32_t indexKeyCount;
+        uint32_t bucketSizeBytes;
+    };
+
+    CollectionValidation::ValidateState* _validateState;
+
+    // We map the hashed KeyString values to a bucket that contains the count of how many
+    // index keys and document keys we've seen in each bucket. This counter is unsigned to avoid
+    // undefined behavior in the (unlikely) case of overflow.
+    // Count rules:
+    //     - If the count is non-zero for a bucket after all documents and index entries have been
+    //       processed, one or more indexes are inconsistent for KeyStrings that map to it.
+    //       Otherwise, those keys are consistent for all indexes with a high degree of confidence.
+    //     - Absent overflow, if a count interpreted as twos complement integer ends up greater
+    //       than zero, there are too few index entries.
+    //     - Similarly, if that count ends up less than zero, there are too many index entries.
+
+    std::vector<IndexKeyBucket> _indexKeyBuckets;
+
+    // Whether we're in the first or second phase of index validation.
+    bool _firstPhase;
+
+private:
+    IndexConsistency() = delete;
+};  // IndexConsistency
+
+/**
+ * The KeyStringIndexConsistency class is used to keep track of the index consistency for
+ * KeyString based indexes. It does this by using the index keys from index entries and index keys
+ * generated from the document to ensure there is a one-to-one mapping for each key. In addition, an
+ * IndexObserver class can be hooked into the IndexAccessMethod to inform this class about changes
+ * to the indexes during a validation and compensate for them.
+ */
+class KeyStringIndexConsistency final : protected IndexConsistency {
+    using IndexInfoMap = std::map<std::string, IndexInfo>;
+    using IndexKey = std::pair<std::string, std::string>;
+
+public:
+    KeyStringIndexConsistency(OperationContext* opCtx,
+                              CollectionValidation::ValidateState* validateState,
+                              size_t numHashBuckets = kNumHashBuckets);
+
+    void setSecondPhase() {
+        IndexConsistency::setSecondPhase();
+    }
 
     /**
-     * Returns true if any value in the `_indexKeyCount` map is not equal to 0, otherwise
-     * return false.
+     * Traverses the column-store index via 'cursor' and accumulates the traversal results.
      */
-    bool haveEntryMismatch() const;
+    int64_t traverseIndex(OperationContext* opCtx,
+                          const IndexCatalogEntry* index,
+                          ProgressMeterHolder& _progress,
+                          ValidateResults* results);
 
     /**
-     * Return info on all indexes tracked by this.
+     * Traverses all paths in a single record from the row-store via the given {'recordId','record'}
+     * pair and accumulates the traversal results.
      */
-    IndexInfoMap& getIndexInfo() {
-        return _indexesInfo;
-    }
-    IndexInfo& getIndexInfo(const std::string& indexName) {
-        return _indexesInfo.at(indexName);
-    }
+    void traverseRecord(OperationContext* opCtx,
+                        const CollectionPtr& coll,
+                        const IndexCatalogEntry* index,
+                        const RecordId& recordId,
+                        const BSONObj& recordBson,
+                        ValidateResults* results);
 
     /**
-     * Informs the IndexConsistency object that we're advancing to the second phase of index
-     * validation.
+     * Returns true if any value in the `_indexKeyCount` map is not equal to 0, otherwise return
+     * false.
      */
-    void setSecondPhase();
+    bool haveEntryMismatch() const;
 
     /**
      * If repair mode enabled, try inserting _missingIndexEntries into indexes.
      */
-    void repairMissingIndexEntries(OperationContext* opCtx, ValidateResults* results);
+    void repairIndexEntries(OperationContext* opCtx, ValidateResults* results);
 
     /**
      * Records the errors gathered from the second phase of index validation into the provided
      * ValidateResultsMap and ValidateResults.
      */
-    void addIndexEntryErrors(ValidateResults* results);
+    void addIndexEntryErrors(OperationContext* opCtx, ValidateResults* results);
 
     /**
      * Sets up this IndexConsistency object to limit memory usage in the second phase of index
@@ -177,35 +203,21 @@ public:
      */
     bool limitMemoryUsageForSecondPhase(ValidateResults* result);
 
-private:
-    struct IndexKeyBucket {
-        uint32_t indexKeyCount;
-        uint32_t bucketSizeBytes;
-    };
-
-    IndexConsistency() = delete;
+    void validateIndexKeyCount(OperationContext* opCtx,
+                               const IndexCatalogEntry* index,
+                               long long* numRecords,
+                               IndexValidateResults& results);
 
-    CollectionValidation::ValidateState* _validateState;
-
-    // We map the hashed KeyString values to a bucket that contains the count of how many
-    // index keys and document keys we've seen in each bucket. This counter is unsigned to avoid
-    // undefined behavior in the (unlikely) case of overflow.
-    // Count rules:
-    //     - If the count is non-zero for a bucket after all documents and index entries have been
-    //       processed, one or more indexes are inconsistent for KeyStrings that map to it.
-    //       Otherwise, those keys are consistent for all indexes with a high degree of confidence.
-    //     - Absent overflow, if a count interpreted as twos complement integer ends up greater
-    //       than zero, there are too few index entries.
-    //     - Similarly, if that count ends up less than zero, there are too many index entries.
+    uint64_t getTotalIndexKeys() {
+        return _totalIndexKeys;
+    }
 
-    std::vector<IndexKeyBucket> _indexKeyBuckets;
+private:
+    KeyStringIndexConsistency() = delete;
 
     // A vector of IndexInfo indexes by index number
     IndexInfoMap _indexesInfo;
 
-    // Whether we're in the first or second phase of index validation.
-    bool _firstPhase;
-
     // Populated during the second phase of validation, this map contains the index entries that
     // were pointing at an invalid document key.
     // The map contains a IndexKey pointing at a set of BSON objects as there may be multiple
@@ -218,6 +230,55 @@ private:
     // index entry for a given IndexKey for each index.
     std::map<IndexKey, IndexEntryInfo> _missingIndexEntries;
 
+    // The total number of index keys is stored during the first validation phase, since this
+    // count may change during a second phase.
+    uint64_t _totalIndexKeys = 0;
+
+    /**
+     * Return info for an index tracked by this with the given 'indexName'.
+     */
+    IndexInfo& getIndexInfo(const std::string& indexName) {
+        return _indexesInfo.at(indexName);
+    }
+
+    /**
+     * During the first phase of validation, given the document's key KeyString, increment the
+     * corresponding `_indexKeyCount` by hashing it.
+     * For the second phase of validation, keep track of the document keys that hashed to
+     * inconsistent hash buckets during the first phase of validation.
+     */
+    void addDocKey(OperationContext* opCtx,
+                   const KeyString::Value& ks,
+                   IndexInfo* indexInfo,
+                   const RecordId& recordId);
+
+    /**
+     * During the first phase of validation, given the index entry's KeyString, decrement the
+     * corresponding `_indexKeyCount` by hashing it.
+     * For the second phase of validation, try to match the index entry keys that hashed to
+     * inconsistent hash buckets during the first phase of validation to document keys.
+     */
+    void addIndexKey(OperationContext* opCtx,
+                     const KeyString::Value& ks,
+                     IndexInfo* indexInfo,
+                     const RecordId& recordId,
+                     ValidateResults* results);
+
+    /**
+     * During the first phase of validation, tracks the multikey paths for every observed document.
+     */
+    void addDocumentMultikeyPaths(IndexInfo* indexInfo, const MultikeyPaths& multikeyPaths);
+
+    /**
+     * To validate $** multikey metadata paths, we first scan the collection and add a hash of all
+     * multikey paths encountered to a set. We then scan the index for multikey metadata path
+     * entries and remove any path encountered. As we expect the index to contain a super-set of
+     * the collection paths, a non-empty set represents an invalid index.
+     */
+    void addMultikeyMetadataPath(const KeyString::Value& ks, IndexInfo* indexInfo);
+    void removeMultikeyMetadataPath(const KeyString::Value& ks, IndexInfo* indexInfo);
+    size_t getMultikeyMetadataPathCount(IndexInfo* indexInfo);
+
     /**
      * Generates a key for the second phase of validation. The keys format is the following:
      * {
@@ -241,5 +302,5 @@ private:
      */
     uint32_t _hashKeyString(const KeyString::Value& ks, uint32_t indexNameHash) const;
 
-};  // IndexConsistency
+};  // KeyStringIndexConsistency
 }  // namespace mongo
diff --git a/src/mongo/db/catalog/validate_adaptor.cpp b/src/mongo/db/catalog/validate_adaptor.cpp
index 64e2f3fe151..035f5be1eb2 100644
--- a/src/mongo/db/catalog/validate_adaptor.cpp
+++ b/src/mongo/db/catalog/validate_adaptor.cpp
@@ -39,11 +39,13 @@
 #include "mongo/bson/util/bsoncolumn.h"
 #include "mongo/db/catalog/clustered_collection_util.h"
 #include "mongo/db/catalog/collection.h"
+#include "mongo/db/catalog/column_index_consistency.h"
 #include "mongo/db/catalog/index_catalog.h"
 #include "mongo/db/catalog/index_consistency.h"
 #include "mongo/db/catalog/throttle_cursor.h"
 #include "mongo/db/concurrency/exception_util.h"
 #include "mongo/db/curop.h"
+#include "mongo/db/index/columns_access_method.h"
 #include "mongo/db/index/index_access_method.h"
 #include "mongo/db/index/index_descriptor.h"
 #include "mongo/db/index/wildcard_access_method.h"
@@ -71,11 +73,8 @@ namespace mongo {
 
 namespace {
 
-MONGO_FAIL_POINT_DEFINE(crashOnMultikeyValidateFailure);
-
 // Set limit for size of corrupted records that will be reported.
 const long long kMaxErrorSizeBytes = 1 * 1024 * 1024;
-const long long kInterruptIntervalNumRecords = 4096;
 const long long kInterruptIntervalNumBytes = 50 * 1024 * 1024;  // 50MB.
 
 static constexpr const char* kSchemaValidationFailedReason =
@@ -575,373 +574,19 @@ Status ValidateAdaptor::validateRecord(OperationContext* opCtx,
                                              results);
     }
 
-    auto& executionCtx = StorageExecutionContext::get(opCtx);
     SharedBufferFragmentBuilder pool(KeyString::HeapBuilder::kHeapAllocatorDefaultBytes);
 
     for (const auto& index : _validateState->getIndexes()) {
         const IndexDescriptor* descriptor = index->descriptor();
-        auto iam = index->accessMethod()->asSortedData();
-
         if (descriptor->isPartial() && !index->getFilterExpression()->matchesBSON(recordBson))
             continue;
 
-        auto documentKeySet = executionCtx.keys();
-        auto multikeyMetadataKeys = executionCtx.multikeyMetadataKeys();
-        auto documentMultikeyPaths = executionCtx.multikeyPaths();
-
-        iam->getKeys(opCtx,
-                     coll,
-                     pool,
-                     recordBson,
-                     InsertDeleteOptions::ConstraintEnforcementMode::kEnforceConstraints,
-                     SortedDataIndexAccessMethod::GetKeysContext::kAddingKeys,
-                     documentKeySet.get(),
-                     multikeyMetadataKeys.get(),
-                     documentMultikeyPaths.get(),
-                     recordId);
-
-        bool shouldBeMultikey = iam->shouldMarkIndexAsMultikey(
-            documentKeySet->size(),
-            {multikeyMetadataKeys->begin(), multikeyMetadataKeys->end()},
-            *documentMultikeyPaths);
-
-        if (!index->isMultikey(opCtx, coll) && shouldBeMultikey) {
-            if (_validateState->fixErrors()) {
-                writeConflictRetry(opCtx, "setIndexAsMultikey", coll->ns().ns(), [&] {
-                    WriteUnitOfWork wuow(opCtx);
-                    coll->getIndexCatalog()->setMultikeyPaths(
-                        opCtx, coll, descriptor, *multikeyMetadataKeys, *documentMultikeyPaths);
-                    wuow.commit();
-                });
-
-                LOGV2(4614700,
-                      "Index set to multikey",
-                      "indexName"_attr = descriptor->indexName(),
-                      "collection"_attr = coll->ns().ns());
-                results->warnings.push_back(str::stream() << "Index " << descriptor->indexName()
-                                                          << " set to multikey.");
-                results->repaired = true;
-            } else {
-                auto& curRecordResults = (results->indexResultsMap)[descriptor->indexName()];
-                std::string msg = fmt::format(
-                    "Index {} is not multikey but has more than one key in document with "
-                    "RecordId({}) and {}",
-                    descriptor->indexName(),
-                    recordId.toString(),
-                    recordBson.getField("_id").toString());
-                curRecordResults.errors.push_back(msg);
-                curRecordResults.valid = false;
-                if (crashOnMultikeyValidateFailure.shouldFail()) {
-                    invariant(false, msg);
-                }
-            }
-        }
-
-        if (index->isMultikey(opCtx, coll)) {
-            const MultikeyPaths& indexPaths = index->getMultikeyPaths(opCtx, coll);
-            if (!MultikeyPathTracker::covers(indexPaths, *documentMultikeyPaths.get())) {
-                if (_validateState->fixErrors()) {
-                    writeConflictRetry(opCtx, "increaseMultikeyPathCoverage", coll->ns().ns(), [&] {
-                        WriteUnitOfWork wuow(opCtx);
-                        coll->getIndexCatalog()->setMultikeyPaths(
-                            opCtx, coll, descriptor, *multikeyMetadataKeys, *documentMultikeyPaths);
-                        wuow.commit();
-                    });
-
-                    LOGV2(4614701,
-                          "Multikey paths updated to cover multikey document",
-                          "indexName"_attr = descriptor->indexName(),
-                          "collection"_attr = coll->ns().ns());
-                    results->warnings.push_back(str::stream() << "Index " << descriptor->indexName()
-                                                              << " multikey paths updated.");
-                    results->repaired = true;
-                } else {
-                    std::string msg = fmt::format(
-                        "Index {} multikey paths do not cover a document with RecordId({}) and {}",
-                        descriptor->indexName(),
-                        recordId.toString(),
-                        recordBson.getField("_id").toString());
-                    auto& curRecordResults = (results->indexResultsMap)[descriptor->indexName()];
-                    curRecordResults.errors.push_back(msg);
-                    curRecordResults.valid = false;
-                }
-            }
-        }
-
-        IndexInfo& indexInfo = _indexConsistency->getIndexInfo(descriptor->indexName());
-        if (shouldBeMultikey) {
-            indexInfo.multikeyDocs = true;
-        }
-
-        // An empty set of multikey paths indicates that this index does not track path-level
-        // multikey information and we should do no tracking.
-        if (shouldBeMultikey && documentMultikeyPaths->size()) {
-            _indexConsistency->addDocumentMultikeyPaths(&indexInfo, *documentMultikeyPaths);
-        }
-
-        for (const auto& keyString : *multikeyMetadataKeys) {
-            try {
-                _indexConsistency->addMultikeyMetadataPath(keyString, &indexInfo);
-            } catch (...) {
-                return exceptionToStatus();
-            }
-        }
 
-        for (const auto& keyString : *documentKeySet) {
-            try {
-                _totalIndexKeys++;
-                _indexConsistency->addDocKey(opCtx, keyString, &indexInfo, recordId);
-            } catch (...) {
-                return exceptionToStatus();
-            }
-        }
+        this->traverseRecord(opCtx, coll, index.get(), recordId, recordBson, results);
     }
     return Status::OK();
 }
 
-namespace {
-// Ensures that index entries are in increasing or decreasing order.
-void _validateKeyOrder(OperationContext* opCtx,
-                       const IndexCatalogEntry* index,
-                       const KeyString::Value& currKey,
-                       const KeyString::Value& prevKey,
-                       IndexValidateResults* results) {
-    auto descriptor = index->descriptor();
-    bool unique = descriptor->unique();
-
-    // KeyStrings will be in strictly increasing order because all keys are sorted and they are in
-    // the format (Key, RID), and all RecordIDs are unique.
-    if (currKey.compare(prevKey) <= 0) {
-        if (results && results->valid) {
-            results->errors.push_back(str::stream()
-                                      << "index '" << descriptor->indexName()
-                                      << "' is not in strictly ascending or descending order");
-        }
-        if (results) {
-            results->valid = false;
-        }
-        return;
-    }
-
-    if (unique) {
-        // Unique indexes must not have duplicate keys.
-        int cmp = currKey.compareWithoutRecordIdLong(prevKey);
-        if (cmp != 0) {
-            return;
-        }
-
-        if (results && results->valid) {
-            auto bsonKey = KeyString::toBson(currKey, Ordering::make(descriptor->keyPattern()));
-            auto firstRecordId =
-                KeyString::decodeRecordIdLongAtEnd(prevKey.getBuffer(), prevKey.getSize());
-            auto secondRecordId =
-                KeyString::decodeRecordIdLongAtEnd(currKey.getBuffer(), currKey.getSize());
-            results->errors.push_back(str::stream() << "Unique index '" << descriptor->indexName()
-                                                    << "' has duplicate key: " << bsonKey
-                                                    << ", first record: " << firstRecordId
-                                                    << ", second record: " << secondRecordId);
-        }
-        if (results) {
-            results->valid = false;
-        }
-    }
-}
-}  // namespace
-
-void ValidateAdaptor::traverseIndex(OperationContext* opCtx,
-                                    const IndexCatalogEntry* index,
-                                    int64_t* numTraversedKeys,
-                                    ValidateResults* results) {
-    const IndexDescriptor* descriptor = index->descriptor();
-    auto indexName = descriptor->indexName();
-    auto& indexResults = results->indexResultsMap[indexName];
-    IndexInfo& indexInfo = _indexConsistency->getIndexInfo(indexName);
-    int64_t numKeys = 0;
-
-    bool isFirstEntry = true;
-
-    // The progress meter will be inactive after traversing the record store to allow the message
-    // and the total to be set to different values.
-    if (!_progress.get(WithLock::withoutLock())->isActive()) {
-        const char* curopMessage = "Validate: scanning index entries";
-        stdx::unique_lock<Client> lk(*opCtx->getClient());
-        _progress.set(
-            lk, CurOp::get(opCtx)->setProgress_inlock(curopMessage, _totalIndexKeys), opCtx);
-    }
-
-    const KeyString::Version version =
-        index->accessMethod()->asSortedData()->getSortedDataInterface()->getKeyStringVersion();
-
-    KeyString::Builder firstKeyStringBuilder(
-        version, BSONObj(), indexInfo.ord, KeyString::Discriminator::kExclusiveBefore);
-    KeyString::Value firstKeyString = firstKeyStringBuilder.getValueCopy();
-    KeyString::Value prevIndexKeyStringValue;
-
-    // Ensure that this index has an open index cursor.
-    const auto indexCursorIt = _validateState->getIndexCursors().find(indexName);
-    invariant(indexCursorIt != _validateState->getIndexCursors().end());
-
-    const std::unique_ptr<SortedDataInterfaceThrottleCursor>& indexCursor = indexCursorIt->second;
-
-    boost::optional<KeyStringEntry> indexEntry;
-    try {
-        indexEntry = indexCursor->seekForKeyString(opCtx, firstKeyString);
-    } catch (const DBException& ex) {
-        if (TestingProctor::instance().isEnabled() && ex.code() != ErrorCodes::WriteConflict) {
-            LOGV2_FATAL(5318400,
-                        "Error seeking to first key",
-                        "error"_attr = ex.toString(),
-                        "index"_attr = indexName,
-                        "key"_attr = firstKeyString.toString());
-        }
-        throw;
-    }
-
-    const auto keyFormat =
-        index->accessMethod()->asSortedData()->getSortedDataInterface()->rsKeyFormat();
-    const RecordId kWildcardMultikeyMetadataRecordId = record_id_helpers::reservedIdFor(
-        record_id_helpers::ReservationId::kWildcardMultikeyMetadataId, keyFormat);
-
-    // Warn about unique indexes with keys in old format (without record id).
-    bool foundOldUniqueIndexKeys = false;
-
-    while (indexEntry) {
-        if (!isFirstEntry) {
-            _validateKeyOrder(
-                opCtx, index, indexEntry->keyString, prevIndexKeyStringValue, &indexResults);
-        }
-
-        if (!foundOldUniqueIndexKeys && !descriptor->isIdIndex() && descriptor->unique() &&
-            !indexCursor->isRecordIdAtEndOfKeyString()) {
-            results->warnings.push_back(
-                fmt::format("Unique index {} has one or more keys in the old format (without "
-                            "embedded record id). First record: {}",
-                            indexInfo.indexName,
-                            indexEntry->loc.toString()));
-            foundOldUniqueIndexKeys = true;
-        }
-
-        bool isMetadataKey = indexEntry->loc == kWildcardMultikeyMetadataRecordId;
-        if (descriptor->getIndexType() == IndexType::INDEX_WILDCARD && isMetadataKey) {
-            _indexConsistency->removeMultikeyMetadataPath(indexEntry->keyString, &indexInfo);
-        } else {
-            try {
-                _indexConsistency->addIndexKey(
-                    opCtx, indexEntry->keyString, &indexInfo, indexEntry->loc, results);
-            } catch (const DBException& e) {
-                StringBuilder ss;
-                ss << "Parsing index key for " << indexInfo.indexName << " recId "
-                   << indexEntry->loc << " threw exception " << e.toString();
-                results->errors.push_back(ss.str());
-                results->valid = false;
-            }
-        }
-        {
-            stdx::unique_lock<Client> lk(*opCtx->getClient());
-            _progress.get(lk)->hit();
-        }
-        numKeys++;
-        isFirstEntry = false;
-        prevIndexKeyStringValue = indexEntry->keyString;
-
-        if (numKeys % kInterruptIntervalNumRecords == 0) {
-            // Periodically checks for interrupts and yields.
-            opCtx->checkForInterrupt();
-            _validateState->yield(opCtx);
-        }
-
-        try {
-            indexEntry = indexCursor->nextKeyString(opCtx);
-        } catch (const DBException& ex) {
-            if (TestingProctor::instance().isEnabled() && ex.code() != ErrorCodes::WriteConflict) {
-                LOGV2_FATAL(5318401,
-                            "Error advancing index cursor",
-                            "error"_attr = ex.toString(),
-                            "index"_attr = indexName,
-                            "prevKey"_attr = prevIndexKeyStringValue.toString());
-            }
-            throw;
-        }
-    }
-
-    if (results && _indexConsistency->getMultikeyMetadataPathCount(&indexInfo) > 0) {
-        results->errors.push_back(str::stream()
-                                  << "Index '" << descriptor->indexName()
-                                  << "' has one or more missing multikey metadata index keys");
-        results->valid = false;
-    }
-
-    // Adjust multikey metadata when allowed. These states are all allowed by the design of
-    // multikey. A collection should still be valid without these adjustments.
-    if (_validateState->adjustMultikey()) {
-
-        // If this collection has documents that make this index multikey, then check whether those
-        // multikey paths match the index's metadata.
-        auto indexPaths = index->getMultikeyPaths(opCtx, _validateState->getCollection());
-        auto& documentPaths = indexInfo.docMultikeyPaths;
-        if (indexInfo.multikeyDocs && documentPaths != indexPaths) {
-            LOGV2(5367500,
-                  "Index's multikey paths do not match those of its documents",
-                  "index"_attr = descriptor->indexName(),
-                  "indexPaths"_attr = MultikeyPathTracker::dumpMultikeyPaths(indexPaths),
-                  "documentPaths"_attr = MultikeyPathTracker::dumpMultikeyPaths(documentPaths));
-
-            // Since we have the correct multikey path information for this index, we can tighten up
-            // its metadata to improve query performance. This may apply in two distinct scenarios:
-            // 1. Collection data has changed such that the current multikey paths on the index
-            // are too permissive and certain document paths are no longer multikey.
-            // 2. This index was built before 3.4, and there is no multikey path information for
-            // the index. We can effectively 'upgrade' the index so that it does not need to be
-            // rebuilt to update this information.
-            writeConflictRetry(opCtx, "updateMultikeyPaths", _validateState->nss().ns(), [&]() {
-                WriteUnitOfWork wuow(opCtx);
-                auto writeableIndex = const_cast<IndexCatalogEntry*>(index);
-                const bool isMultikey = true;
-                writeableIndex->forceSetMultikey(
-                    opCtx, _validateState->getCollection(), isMultikey, documentPaths);
-                wuow.commit();
-            });
-
-            if (results) {
-                results->warnings.push_back(str::stream() << "Updated index multikey metadata"
-                                                          << ": " << descriptor->indexName());
-                results->repaired = true;
-            }
-        }
-
-        // If this index does not need to be multikey, then unset the flag.
-        if (index->isMultikey(opCtx, _validateState->getCollection()) && !indexInfo.multikeyDocs) {
-            invariant(!indexInfo.docMultikeyPaths.size());
-
-            LOGV2(5367501,
-                  "Index is multikey but there are no multikey documents",
-                  "index"_attr = descriptor->indexName());
-
-            // This makes an improvement in the case that no documents make the index multikey and
-            // the flag can be unset entirely. This may be due to a change in the data or historical
-            // multikey bugs that have persisted incorrect multikey infomation.
-            writeConflictRetry(opCtx, "unsetMultikeyPaths", _validateState->nss().ns(), [&]() {
-                WriteUnitOfWork wuow(opCtx);
-                auto writeableIndex = const_cast<IndexCatalogEntry*>(index);
-                const bool isMultikey = false;
-                writeableIndex->forceSetMultikey(
-                    opCtx, _validateState->getCollection(), isMultikey, {});
-                wuow.commit();
-            });
-
-            if (results) {
-                results->warnings.push_back(str::stream() << "Unset index multikey metadata"
-                                                          << ": " << descriptor->indexName());
-                results->repaired = true;
-            }
-        }
-    }
-
-    if (numTraversedKeys) {
-        *numTraversedKeys = numKeys;
-    }
-}
-
 void ValidateAdaptor::traverseRecordStore(OperationContext* opCtx,
                                           ValidateResults* results,
                                           BSONObjBuilder* output) {
@@ -1093,7 +738,7 @@ void ValidateAdaptor::traverseRecordStore(OperationContext* opCtx,
 
         prevRecordId = record->id;
 
-        if (_numRecords % kInterruptIntervalNumRecords == 0 ||
+        if (_numRecords % IndexConsistency::kInterruptIntervalNumRecords == 0 ||
             interruptIntervalNumBytes >= kInterruptIntervalNumBytes) {
             // Periodically checks for interrupts and yields.
             opCtx->checkForInterrupt();
@@ -1125,78 +770,90 @@ void ValidateAdaptor::traverseRecordStore(OperationContext* opCtx,
     }
 }
 
+bool isColumnStoreIndex(const IndexCatalogEntry* index) {
+    return index->descriptor()->getAccessMethodName() == IndexNames::COLUMN;
+}
+
 void ValidateAdaptor::validateIndexKeyCount(OperationContext* opCtx,
                                             const IndexCatalogEntry* index,
                                             IndexValidateResults& results) {
-    // Fetch the total number of index entries we previously found traversing the index.
-    const IndexDescriptor* desc = index->descriptor();
-    const std::string indexName = desc->indexName();
-    IndexInfo* indexInfo = &_indexConsistency->getIndexInfo(indexName);
-    auto numTotalKeys = indexInfo->numKeys;
-
-    // Update numRecords by subtracting number of records removed from record store in repair mode
-    // when validating index consistency
-    _numRecords -= results.keysRemovedFromRecordStore;
-
-    // Do not fail on finding too few index entries compared to collection entries when full:false.
-    bool hasTooFewKeys = false;
-    bool noErrorOnTooFewKeys = !_validateState->isFullIndexValidation();
-
-    if (desc->isIdIndex() && numTotalKeys != _numRecords) {
-        hasTooFewKeys = (numTotalKeys < _numRecords);
-        std::string msg = str::stream()
-            << "number of _id index entries (" << numTotalKeys
-            << ") does not match the number of documents in the index (" << _numRecords << ")";
-        if (noErrorOnTooFewKeys && (numTotalKeys < _numRecords)) {
-            results.warnings.push_back(msg);
-        } else {
-            results.errors.push_back(msg);
-            results.valid = false;
-        }
+    if (isColumnStoreIndex(index)) {
+        _columnIndexConsistency.validateIndexKeyCount(opCtx, index, &_numRecords, results);
+    } else {
+        _keyBasedIndexConsistency.validateIndexKeyCount(opCtx, index, &_numRecords, results);
     }
+}
 
-    // Hashed indexes may never be multikey.
-    if (desc->getAccessMethodName() == IndexNames::HASHED &&
-        index->isMultikey(opCtx, _validateState->getCollection())) {
-        results.errors.push_back(str::stream() << "Hashed index is incorrectly marked multikey: "
-                                               << desc->indexName());
-        results.valid = false;
+void ValidateAdaptor::traverseIndex(OperationContext* opCtx,
+                                    const IndexCatalogEntry* index,
+                                    int64_t* numTraversedKeys,
+                                    ValidateResults* results) {
+    // The progress meter will be inactive after traversing the record store to allow the message
+    // and the total to be set to different values.
+    if (!_progress.get(WithLock::withoutLock())->isActive()) {
+        const char* curopMessage = "Validate: scanning index entries";
+        stdx::unique_lock<Client> lk(*opCtx->getClient());
+        _progress.set(lk,
+                      CurOp::get(opCtx)->setProgress_inlock(
+                          curopMessage,
+                          isColumnStoreIndex(index)
+                              ? _columnIndexConsistency.getTotalIndexKeys()
+                              : _keyBasedIndexConsistency.getTotalIndexKeys()),
+                      opCtx);
     }
 
-    // Confirm that the number of index entries is not greater than the number of documents in the
-    // collection. This check is only valid for indexes that are not multikey (indexed arrays
-    // produce an index key per array entry) and not $** indexes which can produce index keys for
-    // multiple paths within a single document.
-    if (results.valid && !index->isMultikey(opCtx, _validateState->getCollection()) &&
-        desc->getIndexType() != IndexType::INDEX_WILDCARD && numTotalKeys > _numRecords) {
-        std::string err = str::stream()
-            << "index " << desc->indexName() << " is not multi-key, but has more entries ("
-            << numTotalKeys << ") than documents in the index (" << _numRecords << ")";
-        results.errors.push_back(err);
-        results.valid = false;
+    int64_t numKeys = 0;
+    if (isColumnStoreIndex(index)) {
+        numKeys += _columnIndexConsistency.traverseIndex(opCtx, index, _progress, results);
+    } else {
+        numKeys += _keyBasedIndexConsistency.traverseIndex(opCtx, index, _progress, results);
     }
 
-    // Ignore any indexes with a special access method. If an access method name is given, the
-    // index may be a full text, geo or special index plugin with different semantics.
-    if (results.valid && !desc->isSparse() && !desc->isPartial() && !desc->isIdIndex() &&
-        desc->getAccessMethodName() == "" && numTotalKeys < _numRecords) {
-        hasTooFewKeys = true;
-        std::string msg = str::stream()
-            << "index " << desc->indexName() << " is not sparse or partial, but has fewer entries ("
-            << numTotalKeys << ") than documents in the index (" << _numRecords << ")";
-        if (noErrorOnTooFewKeys) {
-            results.warnings.push_back(msg);
-        } else {
-            results.errors.push_back(msg);
-            results.valid = false;
-        }
+    if (numTraversedKeys) {
+        *numTraversedKeys = numKeys;
     }
+}
 
-    if (!_validateState->isFullIndexValidation() && hasTooFewKeys) {
-        std::string warning = str::stream()
-            << "index " << desc->indexName() << " has fewer keys than records."
-            << " Please re-run the validate command with {full: true}";
-        results.warnings.push_back(warning);
+void ValidateAdaptor::traverseRecord(OperationContext* opCtx,
+                                     const CollectionPtr& coll,
+                                     const IndexCatalogEntry* index,
+                                     const RecordId& recordId,
+                                     const BSONObj& record,
+                                     ValidateResults* results) {
+    if (isColumnStoreIndex(index)) {
+        _columnIndexConsistency.traverseRecord(opCtx, coll, index, recordId, record, results);
+    } else {
+        _keyBasedIndexConsistency.traverseRecord(opCtx, coll, index, recordId, record, results);
     }
 }
+
+void ValidateAdaptor::setSecondPhase() {
+    _columnIndexConsistency.setSecondPhase();
+    _keyBasedIndexConsistency.setSecondPhase();
+}
+
+bool ValidateAdaptor::limitMemoryUsageForSecondPhase(ValidateResults* result) {
+    bool retVal = true;
+    retVal &= _columnIndexConsistency.limitMemoryUsageForSecondPhase(result);
+    retVal &= _keyBasedIndexConsistency.limitMemoryUsageForSecondPhase(result);
+    return retVal;
+}
+
+bool ValidateAdaptor::haveEntryMismatch() const {
+    bool retVal = false;
+    retVal |= _columnIndexConsistency.haveEntryMismatch();
+    retVal |= _keyBasedIndexConsistency.haveEntryMismatch();
+    return retVal;
+}
+
+void ValidateAdaptor::repairIndexEntries(OperationContext* opCtx, ValidateResults* results) {
+    _columnIndexConsistency.repairIndexEntries(opCtx, results);
+    _keyBasedIndexConsistency.repairIndexEntries(opCtx, results);
+}
+
+void ValidateAdaptor::addIndexEntryErrors(OperationContext* opCtx, ValidateResults* results) {
+    _columnIndexConsistency.addIndexEntryErrors(opCtx, results);
+    _keyBasedIndexConsistency.addIndexEntryErrors(opCtx, results);
+}
+
 }  // namespace mongo
diff --git a/src/mongo/db/catalog/validate_adaptor.h b/src/mongo/db/catalog/validate_adaptor.h
index 3217c7b5824..07d05164623 100644
--- a/src/mongo/db/catalog/validate_adaptor.h
+++ b/src/mongo/db/catalog/validate_adaptor.h
@@ -29,12 +29,15 @@
 
 #pragma once
 
+#include <array>
+
+#include "mongo/db/catalog/column_index_consistency.h"
+#include "mongo/db/catalog/index_consistency.h"
 #include "mongo/db/catalog/validate_state.h"
 #include "mongo/util/progress_meter.h"
 
 namespace mongo {
 
-class IndexConsistency;
 class IndexDescriptor;
 class OperationContext;
 
@@ -44,10 +47,11 @@ class OperationContext;
  */
 class ValidateAdaptor {
 public:
-    ValidateAdaptor(IndexConsistency* indexConsistency,
-                    CollectionValidation::ValidateState* validateState)
+    ValidateAdaptor(OperationContext* opCtx, CollectionValidation::ValidateState* validateState)
 
-        : _indexConsistency(indexConsistency), _validateState(validateState) {}
+        : _keyBasedIndexConsistency(opCtx, validateState),
+          _columnIndexConsistency(opCtx, validateState),
+          _validateState(validateState) {}
 
     /**
      * Validates the record data and traverses through its key set to keep track of the
@@ -59,6 +63,13 @@ public:
                                   long long* nNonCompliantDocuments,
                                   size_t* dataSize,
                                   ValidateResults* results);
+    /**
+     * Traverses the record store to retrieve every record and go through its document key
+     * set to keep track of the index consistency during a validation.
+     */
+    void traverseRecordStore(OperationContext* opCtx,
+                             ValidateResults* results,
+                             BSONObjBuilder* output);
 
     /**
      * Traverses the index getting index entries to validate them and keep track of the index keys
@@ -70,12 +81,14 @@ public:
                        ValidateResults* results);
 
     /**
-     * Traverses the record store to retrieve every record and go through its document key
-     * set to keep track of the index consistency during a validation.
+     * Traverses a record on the underlying index consistency objects.
      */
-    void traverseRecordStore(OperationContext* opCtx,
-                             ValidateResults* results,
-                             BSONObjBuilder* output);
+    void traverseRecord(OperationContext* opCtx,
+                        const CollectionPtr& coll,
+                        const IndexCatalogEntry* index,
+                        const RecordId& recordId,
+                        const BSONObj& record,
+                        ValidateResults* results);
 
     /**
      * Validates that the number of document keys matches the number of index keys previously
@@ -85,8 +98,38 @@ public:
                                const IndexCatalogEntry* index,
                                IndexValidateResults& results);
 
+    /**
+     * Informs the index consistency objects that we're advancing to the second phase of index
+     * validation.
+     */
+    void setSecondPhase();
+
+    /**
+     * Sets up the index consistency objects to limit memory usage in the second phase of index
+     * validation. Returns whether the memory limit is sufficient to report at least one index entry
+     * inconsistency and continue with the second phase of validation.
+     */
+    bool limitMemoryUsageForSecondPhase(ValidateResults* result);
+
+    /**
+     * Returns true if the underlying index consistency objects have entry mismatches.
+     */
+    bool haveEntryMismatch() const;
+
+    /**
+     * If repair mode enabled, try inserting _missingIndexEntries into indexes.
+     */
+    void repairIndexEntries(OperationContext* opCtx, ValidateResults* results);
+
+    /**
+     * Records the errors gathered from the second phase of index validation into the provided
+     * ValidateResultsMap and ValidateResults.
+     */
+    void addIndexEntryErrors(OperationContext* opCtx, ValidateResults* results);
+
 private:
-    IndexConsistency* _indexConsistency;
+    KeyStringIndexConsistency _keyBasedIndexConsistency;
+    ColumnIndexConsistency _columnIndexConsistency;
     CollectionValidation::ValidateState* _validateState;
 
     // Saves the record count from the record store traversal to be used later to validate the index
@@ -95,9 +138,5 @@ private:
 
     // For reporting progress during record store and index traversal.
     ProgressMeterHolder _progress;
-
-    // The total number of index keys is stored during the first validation phase, since this
-    // count may change during a second phase.
-    uint64_t _totalIndexKeys = 0;
 };
 }  // namespace mongo
diff --git a/src/mongo/db/catalog/validate_state.cpp b/src/mongo/db/catalog/validate_state.cpp
index 9f0fd873f59..304f664005b 100644
--- a/src/mongo/db/catalog/validate_state.cpp
+++ b/src/mongo/db/catalog/validate_state.cpp
@@ -38,6 +38,7 @@
 #include "mongo/db/catalog/index_consistency.h"
 #include "mongo/db/catalog/validate_adaptor.h"
 #include "mongo/db/db_raii.h"
+#include "mongo/db/index/columns_access_method.h"
 #include "mongo/db/index/index_access_method.h"
 #include "mongo/db/operation_context.h"
 #include "mongo/db/storage/durable_catalog.h"
@@ -192,6 +193,9 @@ void ValidateState::_yieldCursors(OperationContext* opCtx) {
     for (const auto& indexCursor : _indexCursors) {
         indexCursor.second->save();
     }
+    for (const auto& indexCursor : _columnStoreIndexCursors) {
+        indexCursor.second->save();
+    }
 
     _traverseRecordStoreCursor->save();
     _seekRecordStoreCursor->save();
@@ -200,6 +204,9 @@ void ValidateState::_yieldCursors(OperationContext* opCtx) {
     for (const auto& indexCursor : _indexCursors) {
         indexCursor.second->restore();
     }
+    for (const auto& indexCursor : _columnStoreIndexCursors) {
+        indexCursor.second->restore();
+    }
 
     uassert(ErrorCodes::Interrupted,
             "Interrupted due to: failure to restore yielded traverse cursor",
@@ -209,9 +216,16 @@ void ValidateState::_yieldCursors(OperationContext* opCtx) {
             _seekRecordStoreCursor->restore());
 }
 
+bool ValidateState::_isIndexDataCheckpointed(OperationContext* opCtx,
+                                             const IndexCatalogEntry* entry) {
+    return isBackground() &&
+        opCtx->getServiceContext()->getStorageEngine()->isInIndividuallyCheckpointedIndexes(
+            entry->getIdent());
+}
+
 void ValidateState::initializeCursors(OperationContext* opCtx) {
     invariant(!_traverseRecordStoreCursor && !_seekRecordStoreCursor && _indexCursors.size() == 0 &&
-              _indexes.size() == 0);
+              _columnStoreIndexCursors.size() == 0 && _indexes.size() == 0);
 
     // Background validation reads from the last stable checkpoint instead of the latest data. This
     // allows concurrent writes to go ahead without interfering with validation's view of the data.
@@ -292,27 +306,51 @@ void ValidateState::initializeCursors(OperationContext* opCtx) {
             continue;
         }
 
-        auto iam = entry->accessMethod()->asSortedData();
-        if (!iam)
-            continue;
-
-        _indexCursors.emplace(
-            desc->indexName(),
-            std::make_unique<SortedDataInterfaceThrottleCursor>(opCtx, iam, &_dataThrottle));
+        if (entry->descriptor()->getAccessMethodName() == IndexNames::COLUMN) {
+            const auto iam = entry->accessMethod();
+            _columnStoreIndexCursors.emplace(
+                desc->indexName(),
+                static_cast<ColumnStoreAccessMethod*>(iam)->storage()->newCursor(opCtx));
+
+            // Skip any newly created indexes that, because they were built with a WT bulk loader,
+            // are checkpoint'ed but not yet consistent with the rest of checkpoint's PIT view of
+            // the data.
+            if (_isIndexDataCheckpointed(opCtx, entry)) {
+                _columnStoreIndexCursors.erase(desc->indexName());
+                LOGV2(
+                    7106110,
+                    "Skipping background validation on the index because the index data is not yet "
+                    "consistent in the checkpoint.",
+                    "desc_indexName"_attr = desc->indexName(),
+                    "nss"_attr = _nss);
+                continue;
+            }
+        } else {
+            const auto iam = entry->accessMethod()->asSortedData();
+            if (!iam) {
+                LOGV2(6325100,
+                      "[Debugging] skipping index {index_name} because it isn't SortedData",
+                      "index_name"_attr = desc->indexName());
+                continue;
+            }
 
-        // Skip any newly created indexes that, because they were built with a WT bulk loader,
-        // are checkpoint'ed but not yet consistent with the rest of checkpoint's PIT view of
-        // the data.
-        if (isBackground() &&
-            opCtx->getServiceContext()->getStorageEngine()->isInIndividuallyCheckpointedIndexes(
-                diskIndexIdent)) {
-            _indexCursors.erase(desc->indexName());
-            LOGV2(6868903,
-                  "Skipping background validation on the index because the index data is not yet "
-                  "consistent in the checkpoint.",
-                  "desc_indexName"_attr = desc->indexName(),
-                  "nss"_attr = _nss);
-            continue;
+            _indexCursors.emplace(
+                desc->indexName(),
+                std::make_unique<SortedDataInterfaceThrottleCursor>(opCtx, iam, &_dataThrottle));
+
+            // Skip any newly created indexes that, because they were built with a WT bulk loader,
+            // are checkpoint'ed but not yet consistent with the rest of checkpoint's PIT view of
+            // the data.
+            if (_isIndexDataCheckpointed(opCtx, entry)) {
+                _indexCursors.erase(desc->indexName());
+                LOGV2(
+                    6868903,
+                    "Skipping background validation on the index because the index data is not yet "
+                    "consistent in the checkpoint.",
+                    "desc_indexName"_attr = desc->indexName(),
+                    "nss"_attr = _nss);
+                continue;
+            }
         }
 
         _indexes.push_back(indexCatalog->getEntryShared(desc));
diff --git a/src/mongo/db/catalog/validate_state.h b/src/mongo/db/catalog/validate_state.h
index c508a6b5850..a5902ca52ff 100644
--- a/src/mongo/db/catalog/validate_state.h
+++ b/src/mongo/db/catalog/validate_state.h
@@ -148,8 +148,7 @@ public:
     /**
      * Map of index names to index cursors.
      */
-    const std::map<std::string, std::unique_ptr<SortedDataInterfaceThrottleCursor>>&
-    getIndexCursors() const {
+    const StringMap<std::unique_ptr<SortedDataInterfaceThrottleCursor>>& getIndexCursors() const {
         return _indexCursors;
     }
 
@@ -161,6 +160,10 @@ public:
         return _seekRecordStoreCursor;
     }
 
+    const StringMap<std::unique_ptr<ColumnStore::Cursor>>& getColumnStoreCursors() const {
+        return _columnStoreIndexCursors;
+    }
+
     RecordId getFirstRecordId() const {
         return _firstRecordId;
     }
@@ -227,6 +230,8 @@ private:
      */
     void _yieldCursors(OperationContext* opCtx);
 
+    bool _isIndexDataCheckpointed(OperationContext* opCtx, const IndexCatalogEntry* entry);
+
     NamespaceString _nss;
     ValidateMode _mode;
     RepairMode _repairMode;
@@ -253,9 +258,10 @@ private:
     std::vector<std::shared_ptr<const IndexCatalogEntry>> _indexes;
 
     // Shared cursors to be used during validation, created in 'initializeCursors()'.
-    std::map<std::string, std::unique_ptr<SortedDataInterfaceThrottleCursor>> _indexCursors;
+    StringMap<std::unique_ptr<SortedDataInterfaceThrottleCursor>> _indexCursors;
     std::unique_ptr<SeekableRecordThrottleCursor> _traverseRecordStoreCursor;
     std::unique_ptr<SeekableRecordThrottleCursor> _seekRecordStoreCursor;
+    StringMap<std::unique_ptr<ColumnStore::Cursor>> _columnStoreIndexCursors;
 
     RecordId _firstRecordId;
 
diff --git a/src/mongo/db/index/columns_access_method.cpp b/src/mongo/db/index/columns_access_method.cpp
index 3aa913a0f19..96af1ddfd50 100644
--- a/src/mongo/db/index/columns_access_method.cpp
+++ b/src/mongo/db/index/columns_access_method.cpp
@@ -265,7 +265,6 @@ Status ColumnStoreAccessMethod::BulkBuilder::keyCommitted(
     return Status::OK();
 }
 
-
 void ColumnStoreAccessMethod::_visitCellsForIndexInsert(
     OperationContext* opCtx,
     PooledFragmentBuilder& buf,
@@ -375,6 +374,9 @@ Status ColumnStoreAccessMethod::update(OperationContext* opCtx,
             [&](column_keygen::ColumnKeyGenerator::DiffAction diffAction,
                 StringData path,
                 const column_keygen::UnencodedCellView* cell) {
+                // TODO SERVER-72351: This is a temporary fix for SERVER-72351.
+                ON_BLOCK_EXIT([&]() { columnKeys->clear(); });
+
                 if (diffAction == column_keygen::ColumnKeyGenerator::DiffAction::kDelete) {
                     columnKeys->emplace_back(std::make_tuple(path.toString(), "", rid));
                     int64_t removed = 0;
diff --git a/src/mongo/db/index/columns_access_method.h b/src/mongo/db/index/columns_access_method.h
index 8d67306fdd7..cbb195c52fa 100644
--- a/src/mongo/db/index/columns_access_method.h
+++ b/src/mongo/db/index/columns_access_method.h
@@ -48,6 +48,10 @@ public:
 
     ColumnStoreAccessMethod(IndexCatalogEntry* ice, std::unique_ptr<ColumnStore>);
 
+    const column_keygen::ColumnKeyGenerator& getKeyGen() const {
+        return _keyGen;
+    }
+
     /**
      * Returns a pointer to the ColumnstoreProjection owned by the underlying ColumnKeyGenerator.
      */
@@ -123,8 +127,16 @@ public:
         return _store.get();
     }
 
+    ColumnStore* writableStorage() const {
+        return _store.get();
+    }
+
     class BulkBuilder;
 
+    const std::string& indexName() const {
+        return _descriptor->indexName();
+    }
+
     /**
      * Returns true iff 'compressor' is a recognized name of a block compression module that is
      * supported for use with the column store index.
diff --git a/src/mongo/db/storage/column_store.h b/src/mongo/db/storage/column_store.h
index 3886054a022..d22b8386dc6 100644
--- a/src/mongo/db/storage/column_store.h
+++ b/src/mongo/db/storage/column_store.h
@@ -53,6 +53,18 @@ struct FullCellView {
     CellView value;
 };
 
+/**
+ * An owned representation of a column-store index entry cell.
+ */
+struct FullCellValue {
+    PathValue path;
+    RowId rid;
+    CellValue value;
+
+    FullCellValue(FullCellView fcv) : path(fcv.path), rid(fcv.rid), value(fcv.value) {}
+    FullCellValue(PathView p, RowId r, CellView v) : path(p), rid(r), value(v) {}
+};
+
 struct CellViewForPath {
     RowId rid;
     CellView value;
@@ -66,10 +78,8 @@ struct CellViewForPath {
  * stores tuples of Path, RecordId and Value in a separate format.
  */
 class ColumnStore {
-protected:
-    class Cursor;
-
 public:
+    class Cursor;
     class WriteCursor {
     public:
         virtual ~WriteCursor() = default;
@@ -376,7 +386,10 @@ public:
         _ident = std::move(newIdent);
     }
 
-protected:
+    /**
+     * The Cursor class is for raw access to the index. Except for unusual use cases (e.g., index
+     * validation) you'll want to use CursorForPath instead.
+     */
     class Cursor {
     public:
         virtual ~Cursor() = default;
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp
index aaaf5c27041..263d0bcbf67 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_column_store.cpp
@@ -270,8 +270,11 @@ public:
         if (!_eof && !_lastMoveSkippedKey) {
             WT_ITEM key;
             WT_CURSOR* c = _cursor->get();
-            c->get_key(c, &key);
-            _buffer.assign(static_cast<const char*>(key.data), key.size);
+            if (c->get_key(c, &key) == 0) {
+                _buffer.assign(static_cast<const char*>(key.data), key.size);
+            } else {
+                _buffer.clear();
+            }
         }
         resetCursor();
     }
@@ -321,6 +324,10 @@ private:
 
         WT_CURSOR* c = _cursor->get();
 
+        if (searchKey.empty()) {
+            return false;
+        }
+
         const WiredTigerItem searchKeyItem(searchKey);
         c->set_key(c, searchKeyItem.Get());
 
diff --git a/src/mongo/dbtests/storage_debug_util.cpp b/src/mongo/dbtests/storage_debug_util.cpp
index 9cf0b3c640a..89f5ca48582 100644
--- a/src/mongo/dbtests/storage_debug_util.cpp
+++ b/src/mongo/dbtests/storage_debug_util.cpp
@@ -72,7 +72,7 @@ void printCollectionAndIndexTableEntries(OperationContext* opCtx, const Namespac
         const auto indexDescriptor = indexCatalogEntry->descriptor();
         const auto iam = indexCatalogEntry->accessMethod()->asSortedData();
         if (!iam) {
-            LOGV2(6325100,
+            LOGV2(6325101,
                   "[Debugging] skipping index {index_name} because it isn't SortedData",
                   "index_name"_attr = indexDescriptor->indexName());
             continue;
author	Mohammad Dashti <mdashti@gmail.com>	2023-01-05 10:51:14 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2023-01-05 11:55:01 +0000
commit	72a56c27b43595e898ccdec5d859fe72b0d17135 (patch)
tree	ce5cfe83b2957b3e79deae2ee0706047fc01453c /src/mongo
parent	6908c42231a9e8ac6611aa745cf37be31f954bd5 (diff)
download	mongo-72a56c27b43595e898ccdec5d859fe72b0d17135.tar.gz