summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorShin Yee Tan <shinyee.tan@mongodb.com>2022-01-28 18:39:25 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-01-28 19:25:21 +0000
commitcf244b1424284ee337406849c791029370d28234 (patch)
tree3c70265b5336b28071005211d438293bedf9d11f /src/mongo/db
parent392ad3ef3d5a8b9a8dbd83ffe1d9a4f780e6fa61 (diff)
downloadmongo-cf244b1424284ee337406849c791029370d28234.tar.gz
SERVER-50081 Remove duplicate documents of unique indexes in repair mode
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/catalog/SConscript15
-rw-r--r--src/mongo/db/catalog/collection.h9
-rw-r--r--src/mongo/db/catalog/collection_impl.cpp12
-rw-r--r--src/mongo/db/catalog/collection_impl.h9
-rw-r--r--src/mongo/db/catalog/collection_mock.h6
-rw-r--r--src/mongo/db/catalog/index_builds_manager.cpp74
-rw-r--r--src/mongo/db/catalog/index_builds_manager.h10
-rw-r--r--src/mongo/db/catalog/index_catalog.h11
-rw-r--r--src/mongo/db/catalog/index_catalog_impl.cpp28
-rw-r--r--src/mongo/db/catalog/index_catalog_impl.h9
-rw-r--r--src/mongo/db/catalog/index_consistency.cpp65
-rw-r--r--src/mongo/db/catalog/index_repair.cpp181
-rw-r--r--src/mongo/db/catalog/index_repair.h63
-rw-r--r--src/mongo/db/catalog/validate_adaptor.cpp4
-rw-r--r--src/mongo/db/catalog/validate_results.cpp2
-rw-r--r--src/mongo/db/catalog/validate_results.h3
16 files changed, 354 insertions, 147 deletions
diff --git a/src/mongo/db/catalog/SConscript b/src/mongo/db/catalog/SConscript
index 94376152931..5111b44c000 100644
--- a/src/mongo/db/catalog/SConscript
+++ b/src/mongo/db/catalog/SConscript
@@ -112,12 +112,26 @@ env.Library(
)
env.Library(
+ target='index_repair',
+ source=[
+ 'index_repair.cpp',
+ ],
+ LIBDEPS_PRIVATE=[
+ '$BUILD_DIR/mongo/db/catalog_raii',
+ '$BUILD_DIR/mongo/db/concurrency/write_conflict_exception',
+ '$BUILD_DIR/mongo/db/index/index_access_method',
+ 'validate_state',
+ ]
+)
+
+env.Library(
target='index_builds_manager',
source=[
'index_builds_manager.cpp',
],
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/base',
+ '$BUILD_DIR/mongo/db/catalog/index_repair',
'$BUILD_DIR/mongo/db/catalog_raii',
'$BUILD_DIR/mongo/db/concurrency/write_conflict_exception',
'$BUILD_DIR/mongo/db/storage/storage_repair_observer',
@@ -344,6 +358,7 @@ env.Library(
'$BUILD_DIR/mongo/base',
'$BUILD_DIR/mongo/db/catalog/collection_catalog',
'$BUILD_DIR/mongo/db/catalog/collection_catalog_helper',
+ '$BUILD_DIR/mongo/db/catalog/index_repair',
'$BUILD_DIR/mongo/db/collection_index_usage_tracker',
'$BUILD_DIR/mongo/db/commands/server_status_core',
'$BUILD_DIR/mongo/db/concurrency/lock_manager',
diff --git a/src/mongo/db/catalog/collection.h b/src/mongo/db/catalog/collection.h
index 8446a8ff438..5ac6f274f3e 100644
--- a/src/mongo/db/catalog/collection.h
+++ b/src/mongo/db/catalog/collection.h
@@ -363,7 +363,8 @@ public:
OpDebug* opDebug,
bool fromMigrate = false,
bool noWarn = false,
- StoreDeletedDoc storeDeletedDoc = StoreDeletedDoc::Off) const = 0;
+ StoreDeletedDoc storeDeletedDoc = StoreDeletedDoc::Off,
+ CheckRecordId checkRecordId = CheckRecordId::Off) const = 0;
/**
* Deletes the document from the collection.
@@ -376,6 +377,9 @@ public:
* 'opDebug' Optional argument. When not null, will be used to record operation statistics.
* 'noWarn' if unindexing the record causes an error, if noWarn is true the error
* will not be logged.
+ * 'storeDeletedDoc' whether to store the document deleted in the oplog.
+ * 'checkRecordId' whether to confirm the recordId matches the record we are removing when
+ * unindexing.
*/
virtual void deleteDocument(OperationContext* opCtx,
Snapshotted<BSONObj> doc,
@@ -384,7 +388,8 @@ public:
OpDebug* opDebug,
bool fromMigrate = false,
bool noWarn = false,
- StoreDeletedDoc storeDeletedDoc = StoreDeletedDoc::Off) const = 0;
+ StoreDeletedDoc storeDeletedDoc = StoreDeletedDoc::Off,
+ CheckRecordId checkRecordId = CheckRecordId::Off) const = 0;
/*
* Inserts all documents inside one WUOW.
diff --git a/src/mongo/db/catalog/collection_impl.cpp b/src/mongo/db/catalog/collection_impl.cpp
index d5e8917b383..00efbf86ec1 100644
--- a/src/mongo/db/catalog/collection_impl.cpp
+++ b/src/mongo/db/catalog/collection_impl.cpp
@@ -1170,9 +1170,11 @@ void CollectionImpl::deleteDocument(OperationContext* opCtx,
OpDebug* opDebug,
bool fromMigrate,
bool noWarn,
- Collection::StoreDeletedDoc storeDeletedDoc) const {
+ Collection::StoreDeletedDoc storeDeletedDoc,
+ CheckRecordId checkRecordId) const {
Snapshotted<BSONObj> doc = docFor(opCtx, loc);
- deleteDocument(opCtx, doc, stmtId, loc, opDebug, fromMigrate, noWarn, storeDeletedDoc);
+ deleteDocument(
+ opCtx, doc, stmtId, loc, opDebug, fromMigrate, noWarn, storeDeletedDoc, checkRecordId);
}
void CollectionImpl::deleteDocument(OperationContext* opCtx,
@@ -1182,7 +1184,8 @@ void CollectionImpl::deleteDocument(OperationContext* opCtx,
OpDebug* opDebug,
bool fromMigrate,
bool noWarn,
- Collection::StoreDeletedDoc storeDeletedDoc) const {
+ Collection::StoreDeletedDoc storeDeletedDoc,
+ CheckRecordId checkRecordId) const {
if (isCapped() && !isClustered() && opCtx->isEnforcingConstraints()) {
// System operations such as tenant migration, secondary batch application or TTL on a
// capped clustered collection can delete from capped collections.
@@ -1224,7 +1227,8 @@ void CollectionImpl::deleteDocument(OperationContext* opCtx,
doc.value(),
loc,
noWarn,
- &keysDeleted);
+ &keysDeleted,
+ checkRecordId);
_shared->_recordStore->deleteRecord(opCtx, loc);
if (deletedDoc) {
deleteArgs.deletedDoc = &(deletedDoc.get());
diff --git a/src/mongo/db/catalog/collection_impl.h b/src/mongo/db/catalog/collection_impl.h
index 9e9046c6ac5..30342ef3730 100644
--- a/src/mongo/db/catalog/collection_impl.h
+++ b/src/mongo/db/catalog/collection_impl.h
@@ -151,7 +151,8 @@ public:
OpDebug* opDebug,
bool fromMigrate = false,
bool noWarn = false,
- Collection::StoreDeletedDoc storeDeletedDoc = Collection::StoreDeletedDoc::Off) const final;
+ Collection::StoreDeletedDoc storeDeletedDoc = Collection::StoreDeletedDoc::Off,
+ CheckRecordId checkRecordId = CheckRecordId::Off) const final;
/**
* Deletes the document from the collection.
@@ -164,10 +165,11 @@ public:
* real delete.
* 'loc' key to uniquely identify a record in a collection.
* 'opDebug' Optional argument. When not null, will be used to record operation statistics.
- * 'cappedOK' if true, allows deletes on capped collections (Cloner::copyDB uses this).
* 'noWarn' if unindexing the record causes an error, if noWarn is true the error
* will not be logged.
* 'storeDeletedDoc' whether to store the document deleted in the oplog.
+ * 'checkRecordId' whether to confirm the recordId matches the record we are removing when
+ * unindexing.
*/
void deleteDocument(
OperationContext* opCtx,
@@ -177,7 +179,8 @@ public:
OpDebug* opDebug,
bool fromMigrate = false,
bool noWarn = false,
- Collection::StoreDeletedDoc storeDeletedDoc = Collection::StoreDeletedDoc::Off) const final;
+ Collection::StoreDeletedDoc storeDeletedDoc = Collection::StoreDeletedDoc::Off,
+ CheckRecordId checkRecordId = CheckRecordId::Off) const final;
/*
* Inserts all documents inside one WUOW.
diff --git a/src/mongo/db/catalog/collection_mock.h b/src/mongo/db/catalog/collection_mock.h
index 894c902329f..7f4961cc306 100644
--- a/src/mongo/db/catalog/collection_mock.h
+++ b/src/mongo/db/catalog/collection_mock.h
@@ -126,7 +126,8 @@ public:
OpDebug* opDebug,
bool fromMigrate,
bool noWarn,
- Collection::StoreDeletedDoc storeDeletedDoc) const {
+ Collection::StoreDeletedDoc storeDeletedDoc,
+ CheckRecordId checkRecordId) const {
std::abort();
}
@@ -138,7 +139,8 @@ public:
OpDebug* opDebug,
bool fromMigrate = false,
bool noWarn = false,
- Collection::StoreDeletedDoc storeDeletedDoc = Collection::StoreDeletedDoc::Off) const {
+ Collection::StoreDeletedDoc storeDeletedDoc = Collection::StoreDeletedDoc::Off,
+ CheckRecordId checkRecordId = CheckRecordId::Off) const {
std::abort();
}
diff --git a/src/mongo/db/catalog/index_builds_manager.cpp b/src/mongo/db/catalog/index_builds_manager.cpp
index 155a411aec2..4179dafb5b1 100644
--- a/src/mongo/db/catalog/index_builds_manager.cpp
+++ b/src/mongo/db/catalog/index_builds_manager.cpp
@@ -36,6 +36,7 @@
#include "mongo/db/catalog/collection.h"
#include "mongo/db/catalog/collection_catalog.h"
#include "mongo/db/catalog/index_catalog.h"
+#include "mongo/db/catalog/index_repair.h"
#include "mongo/db/catalog/uncommitted_collections.h"
#include "mongo/db/catalog_raii.h"
#include "mongo/db/concurrency/write_conflict_exception.h"
@@ -243,7 +244,8 @@ StatusWith<std::pair<long long, long long>> IndexBuildsManager::startBuildingInd
Status status = [&] {
if (repair == RepairData::kYes) {
return builder->dumpInsertsFromBulk(opCtx, coll, [&](const RecordId& rid) {
- auto moveStatus = _moveRecordToLostAndFound(opCtx, ns, lostAndFoundNss, rid);
+ auto moveStatus =
+ mongo::index_repair::moveRecordToLostAndFound(opCtx, ns, lostAndFoundNss, rid);
if (moveStatus.isOK() && (moveStatus.getValue() > 0)) {
recordsRemoved++;
bytesRemoved += moveStatus.getValue();
@@ -398,74 +400,4 @@ StatusWith<MultiIndexBlock*> IndexBuildsManager::_getBuilder(const UUID& buildUU
}
return builderIt->second.get();
}
-
-StatusWith<int> IndexBuildsManager::_moveRecordToLostAndFound(
- OperationContext* opCtx,
- const NamespaceString& nss,
- const NamespaceString& lostAndFoundNss,
- RecordId dupRecord) {
- invariant(opCtx->lockState()->isCollectionLockedForMode(nss, MODE_IX), nss.ns());
- invariant(opCtx->lockState()->isCollectionLockedForMode(lostAndFoundNss, MODE_IX),
- lostAndFoundNss.ns());
-
- auto catalog = CollectionCatalog::get(opCtx);
- auto originalCollection = catalog->lookupCollectionByNamespace(opCtx, nss);
- CollectionPtr localCollection = catalog->lookupCollectionByNamespace(opCtx, lostAndFoundNss);
-
- // Create the collection if it doesn't exist.
- if (!localCollection) {
- Status status =
- writeConflictRetry(opCtx, "createLostAndFoundCollection", lostAndFoundNss.ns(), [&]() {
- AutoGetCollection autoColl(opCtx, lostAndFoundNss, MODE_IX);
-
- // Ensure the database exists.
- auto db = autoColl.ensureDbExists(opCtx);
- invariant(db, lostAndFoundNss.ns());
-
- WriteUnitOfWork wuow(opCtx);
-
- // Since we are potentially deleting a document with duplicate _id values, we need
- // to be able to insert into the lost and found collection without generating any
- // duplicate key errors on the _id value.
- CollectionOptions collOptions;
- collOptions.setNoIdIndex();
- localCollection = db->createCollection(opCtx, lostAndFoundNss, collOptions);
-
- // Ensure the collection exists.
- invariant(localCollection, lostAndFoundNss.ns());
-
- wuow.commit();
- return Status::OK();
- });
- if (!status.isOK()) {
- return status;
- }
- }
-
- return writeConflictRetry(
- opCtx, "writeDupDocToLostAndFoundCollection", nss.ns(), [&]() -> StatusWith<int> {
- WriteUnitOfWork wuow(opCtx);
- Snapshotted<BSONObj> doc;
- int docSize = 0;
-
- if (!originalCollection->findDoc(opCtx, dupRecord, &doc)) {
- return docSize;
- } else {
- docSize = doc.value().objsize();
- }
-
- // Write document to lost_and_found collection and delete from original collection.
- Status status =
- localCollection->insertDocument(opCtx, InsertStatement(doc.value()), nullptr);
- if (!status.isOK()) {
- return status;
- }
-
- originalCollection->deleteDocument(opCtx, kUninitializedStmtId, dupRecord, nullptr);
-
- wuow.commit();
- return docSize;
- });
-}
-
} // namespace mongo
diff --git a/src/mongo/db/catalog/index_builds_manager.h b/src/mongo/db/catalog/index_builds_manager.h
index 2f2a4d6663a..b80ce85697d 100644
--- a/src/mongo/db/catalog/index_builds_manager.h
+++ b/src/mongo/db/catalog/index_builds_manager.h
@@ -207,16 +207,6 @@ private:
// Map of index builders by build UUID. Allows access to the builders so that actions can be
// taken on and information passed to and from index builds.
std::map<UUID, std::unique_ptr<MultiIndexBlock>> _builders;
-
- /**
- * Deletes record containing duplicate keys and insert it into a local lost and found collection
- * titled "local.lost_and_found.<original collection UUID>". Returns the size of the
- * record removed.
- */
- StatusWith<int> _moveRecordToLostAndFound(OperationContext* opCtx,
- const NamespaceString& ns,
- const NamespaceString& lostAndFoundNss,
- RecordId dupRecord);
};
} // namespace mongo
diff --git a/src/mongo/db/catalog/index_catalog.h b/src/mongo/db/catalog/index_catalog.h
index 88773b33266..2f922b0bcc3 100644
--- a/src/mongo/db/catalog/index_catalog.h
+++ b/src/mongo/db/catalog/index_catalog.h
@@ -57,6 +57,14 @@ struct BsonRecord {
const BSONObj* docPtr;
};
+/**
+ * CheckRecordId indicates whether to confirm that the recordId matches the element we are
+ * removing when unindexing. When deleting documents, it is set to 'Off' by default to allow
+ * WiredTiger to do blind unindexing for efficacy. When set to 'On', disables blind deletes and
+ * forces recordid-matching for unindex operations.
+ */
+enum class CheckRecordId { Off, On };
+
enum class IndexBuildMethod {
/**
* Use a collection scan to dump all keys into an external sorter. During this process,
@@ -507,7 +515,8 @@ public:
const BSONObj& obj,
const RecordId& loc,
bool noWarn,
- int64_t* keysDeletedOut) const = 0;
+ int64_t* keysDeletedOut,
+ CheckRecordId checkRecordId = CheckRecordId::Off) const = 0;
/*
* Attempt compaction on all ready indexes to regain disk space, if the storage engine's index
diff --git a/src/mongo/db/catalog/index_catalog_impl.cpp b/src/mongo/db/catalog/index_catalog_impl.cpp
index e3463a5b033..e8fd618f1b8 100644
--- a/src/mongo/db/catalog/index_catalog_impl.cpp
+++ b/src/mongo/db/catalog/index_catalog_impl.cpp
@@ -1652,7 +1652,8 @@ void IndexCatalogImpl::_unindexKeys(OperationContext* opCtx,
const BSONObj& obj,
RecordId loc,
bool logIfError,
- int64_t* const keysDeletedOut) const {
+ int64_t* const keysDeletedOut,
+ CheckRecordId checkRecordId) const {
InsertDeleteOptions options;
prepareInsertDeleteOptions(opCtx, collection->ns(), index->descriptor(), &options);
options.logIfError = logIfError;
@@ -1683,10 +1684,12 @@ void IndexCatalogImpl::_unindexKeys(OperationContext* opCtx,
// are allowed in unique indexes, WiredTiger does not do blind unindexing, and instead confirms
// that the recordid matches the element we are removing.
//
- // We need to disable blind-deletes for in-progress indexes, in order to force recordid-matching
- // for unindex operations, since initial sync can build an index over a collection with
- // duplicates. See SERVER-17487 for more details.
- options.dupsAllowed = options.dupsAllowed || !index->isReady(opCtx, collection);
+ // We need to disable blind-deletes if 'checkRecordId' is explicitly set 'On', or for
+ // in-progress indexes, in order to force recordid-matching for unindex operations, since
+ // initial sync can build an index over a collection with duplicates. See SERVER-17487 for more
+ // details.
+ options.dupsAllowed = options.dupsAllowed || !index->isReady(opCtx, collection) ||
+ (checkRecordId == CheckRecordId::On);
int64_t removed = 0;
Status status = index->accessMethod()->removeKeys(opCtx, keys, loc, options, &removed);
@@ -1711,7 +1714,8 @@ void IndexCatalogImpl::_unindexRecord(OperationContext* opCtx,
const BSONObj& obj,
const RecordId& loc,
bool logIfError,
- int64_t* keysDeletedOut) const {
+ int64_t* keysDeletedOut,
+ CheckRecordId checkRecordId) const {
SharedBufferFragmentBuilder pooledBuilder(KeyString::HeapBuilder::kHeapAllocatorDefaultBytes);
auto& executionCtx = StorageExecutionContext::get(opCtx);
@@ -1739,7 +1743,8 @@ void IndexCatalogImpl::_unindexRecord(OperationContext* opCtx,
return;
}
}
- _unindexKeys(opCtx, collection, entry, *keys, obj, loc, logIfError, keysDeletedOut);
+ _unindexKeys(
+ opCtx, collection, entry, *keys, obj, loc, logIfError, keysDeletedOut, checkRecordId);
}
Status IndexCatalogImpl::indexRecords(OperationContext* opCtx,
@@ -1804,7 +1809,8 @@ void IndexCatalogImpl::unindexRecord(OperationContext* opCtx,
const BSONObj& obj,
const RecordId& loc,
bool noWarn,
- int64_t* keysDeletedOut) const {
+ int64_t* keysDeletedOut,
+ CheckRecordId checkRecordId) const {
if (keysDeletedOut) {
*keysDeletedOut = 0;
}
@@ -1815,7 +1821,8 @@ void IndexCatalogImpl::unindexRecord(OperationContext* opCtx,
IndexCatalogEntry* entry = it->get();
bool logIfError = !noWarn;
- _unindexRecord(opCtx, collection, entry, obj, loc, logIfError, keysDeletedOut);
+ _unindexRecord(
+ opCtx, collection, entry, obj, loc, logIfError, keysDeletedOut, checkRecordId);
}
for (IndexCatalogEntryContainer::const_iterator it = _buildingIndexes.begin();
@@ -1825,7 +1832,8 @@ void IndexCatalogImpl::unindexRecord(OperationContext* opCtx,
// If it's a background index, we DO NOT want to log anything.
bool logIfError = entry->isReady(opCtx, collection) ? !noWarn : false;
- _unindexRecord(opCtx, collection, entry, obj, loc, logIfError, keysDeletedOut);
+ _unindexRecord(
+ opCtx, collection, entry, obj, loc, logIfError, keysDeletedOut, checkRecordId);
}
}
diff --git a/src/mongo/db/catalog/index_catalog_impl.h b/src/mongo/db/catalog/index_catalog_impl.h
index ec15da563b8..e1579f023ef 100644
--- a/src/mongo/db/catalog/index_catalog_impl.h
+++ b/src/mongo/db/catalog/index_catalog_impl.h
@@ -283,7 +283,8 @@ public:
const BSONObj& obj,
const RecordId& loc,
bool noWarn,
- int64_t* keysDeletedOut) const override;
+ int64_t* keysDeletedOut,
+ CheckRecordId checkRecordId = CheckRecordId::Off) const override;
Status compactIndexes(OperationContext* opCtx) const override;
@@ -367,7 +368,8 @@ private:
const BSONObj& obj,
RecordId loc,
bool logIfError,
- int64_t* keysDeletedOut) const;
+ int64_t* keysDeletedOut,
+ CheckRecordId checkRecordId = CheckRecordId::Off) const;
void _unindexRecord(OperationContext* opCtx,
const CollectionPtr& collection,
@@ -375,7 +377,8 @@ private:
const BSONObj& obj,
const RecordId& loc,
bool logIfError,
- int64_t* keysDeletedOut) const;
+ int64_t* keysDeletedOut,
+ CheckRecordId checkRecordId = CheckRecordId::Off) const;
/**
* Helper to remove the index from disk.
diff --git a/src/mongo/db/catalog/index_consistency.cpp b/src/mongo/db/catalog/index_consistency.cpp
index ea4880b35f7..c5c0a2d1ce7 100644
--- a/src/mongo/db/catalog/index_consistency.cpp
+++ b/src/mongo/db/catalog/index_consistency.cpp
@@ -36,6 +36,7 @@
#include "mongo/db/catalog/index_consistency.h"
#include "mongo/db/catalog/index_catalog.h"
+#include "mongo/db/catalog/index_repair.h"
#include "mongo/db/catalog/validate_gen.h"
#include "mongo/db/concurrency/write_conflict_exception.h"
#include "mongo/db/db_raii.h"
@@ -146,19 +147,10 @@ void IndexConsistency::repairMissingIndexEntries(OperationContext* opCtx,
invariant(_validateState->getIndexes().size() > 0);
std::shared_ptr<const IndexCatalogEntry> index = _validateState->getIndexes().front();
for (auto it = _missingIndexEntries.begin(); it != _missingIndexEntries.end();) {
- const IndexKey& key = it->first;
const KeyString::Value& ks = it->second.keyString;
const KeyFormat keyFormat = _validateState->getCollection()->getRecordStore()->keyFormat();
- RecordId rid;
- if (keyFormat == KeyFormat::Long) {
- rid = KeyString::decodeRecordIdLongAtEnd(ks.getBuffer(), ks.getSize());
- } else {
- invariant(keyFormat == KeyFormat::String);
- rid = KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize());
- }
-
- const std::string& indexName = key.first;
+ const std::string& indexName = it->first.first;
if (indexName != index->descriptor()->indexName()) {
// Assuming that _missingIndexEntries is sorted by indexName, this lookup should not
// happen often.
@@ -169,37 +161,16 @@ void IndexConsistency::repairMissingIndexEntries(OperationContext* opCtx,
}
}
}
- IndexAccessMethod* accessMethod = const_cast<IndexAccessMethod*>(index->accessMethod());
- InsertDeleteOptions options;
- options.dupsAllowed = !index->descriptor()->unique();
- int64_t numInserted = 0;
- writeConflictRetry(opCtx, "insertingMissingIndexEntries", _validateState->nss().ns(), [&] {
- WriteUnitOfWork wunit(opCtx);
- Status status =
- accessMethod->insertKeysAndUpdateMultikeyPaths(opCtx,
- _validateState->getCollection(),
- {ks},
- {},
- {},
- rid,
- options,
- nullptr,
- &numInserted);
- wunit.commit();
- });
-
- // InsertKeys may fail in the scenario where there are missing index entries for duplicate
- // documents.
- if (numInserted > 0) {
- auto& indexResults = results->indexResultsMap[indexName];
- indexResults.keysTraversed += numInserted;
- results->numInsertedMissingIndexEntries += numInserted;
- results->repaired = true;
- getIndexInfo(indexName).numKeys += numInserted;
- it = _missingIndexEntries.erase(it);
- } else {
- ++it;
- }
+
+ int64_t numInserted = index_repair::repairMissingIndexEntry(opCtx,
+ index,
+ ks,
+ keyFormat,
+ _validateState->nss(),
+ _validateState->getCollection(),
+ results);
+ getIndexInfo(indexName).numKeys += numInserted;
+ it = _missingIndexEntries.erase(it);
}
if (results->numInsertedMissingIndexEntries > 0) {
@@ -207,6 +178,18 @@ void IndexConsistency::repairMissingIndexEntries(OperationContext* opCtx,
<< "Inserted " << results->numInsertedMissingIndexEntries
<< " missing index entries.");
}
+ if (results->numDocumentsMovedToLostAndFound > 0) {
+ const NamespaceString lostAndFoundNss =
+ NamespaceString(NamespaceString::kLocalDb,
+ "lost_and_found." + _validateState->getCollection()->uuid().toString());
+ results->warnings.push_back(str::stream()
+ << "Removed " << results->numDocumentsMovedToLostAndFound
+ << " duplicate documents to resolve "
+ << results->numDocumentsMovedToLostAndFound +
+ results->numOutdatedMissingIndexEntry
+ << " missing index entries. Removed documents can be found in '"
+ << lostAndFoundNss.ns() << "'.");
+ }
}
void IndexConsistency::addIndexEntryErrors(ValidateResults* results) {
diff --git a/src/mongo/db/catalog/index_repair.cpp b/src/mongo/db/catalog/index_repair.cpp
new file mode 100644
index 00000000000..a528d682c0e
--- /dev/null
+++ b/src/mongo/db/catalog/index_repair.cpp
@@ -0,0 +1,181 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/catalog/index_repair.h"
+#include "mongo/base/status_with.h"
+#include "mongo/db/catalog/validate_state.h"
+#include "mongo/db/catalog_raii.h"
+#include "mongo/db/concurrency/write_conflict_exception.h"
+#include "mongo/db/index/index_access_method.h"
+#include "mongo/logv2/log_debug.h"
+
+namespace mongo {
+namespace index_repair {
+
+StatusWith<int> moveRecordToLostAndFound(OperationContext* opCtx,
+ const NamespaceString& nss,
+ const NamespaceString& lostAndFoundNss,
+ RecordId dupRecord) {
+ AutoGetCollection autoColl(opCtx, lostAndFoundNss, MODE_IX);
+ auto catalog = CollectionCatalog::get(opCtx);
+ auto originalCollection = catalog->lookupCollectionByNamespace(opCtx, nss);
+ CollectionPtr localCollection = catalog->lookupCollectionByNamespace(opCtx, lostAndFoundNss);
+
+ // Creates the collection if it doesn't exist.
+ if (!localCollection) {
+ Status status =
+ writeConflictRetry(opCtx, "createLostAndFoundCollection", lostAndFoundNss.ns(), [&]() {
+ // Ensure the database exists.
+ auto db = autoColl.ensureDbExists(opCtx);
+ invariant(db, lostAndFoundNss.ns());
+
+ WriteUnitOfWork wuow(opCtx);
+
+ // Since we are potentially deleting a document with duplicate _id values, we need
+ // to be able to insert into the lost and found collection without generating any
+ // duplicate key errors on the _id value.
+ CollectionOptions collOptions;
+ collOptions.setNoIdIndex();
+ localCollection = db->createCollection(opCtx, lostAndFoundNss, collOptions);
+
+ // Ensure the collection exists.
+ invariant(localCollection, lostAndFoundNss.ns());
+
+ wuow.commit();
+ return Status::OK();
+ });
+ if (!status.isOK()) {
+ return status;
+ }
+ }
+
+ return writeConflictRetry(
+ opCtx, "writeDupDocToLostAndFoundCollection", nss.ns(), [&]() -> StatusWith<int> {
+ WriteUnitOfWork wuow(opCtx);
+ Snapshotted<BSONObj> doc;
+ int docSize = 0;
+
+ if (!originalCollection->findDoc(opCtx, dupRecord, &doc)) {
+ return docSize;
+ } else {
+ docSize = doc.value().objsize();
+ }
+
+ // Write document to lost_and_found collection and delete from original collection.
+ Status status =
+ localCollection->insertDocument(opCtx, InsertStatement(doc.value()), nullptr);
+ if (!status.isOK()) {
+ return status;
+ }
+
+ // CheckRecordId set to 'On' because we need _unindexKeys to confirm the record id of
+ // this document matches the record id of the element it tries to unindex. This avoids
+ // wrongly unindexing a document with the same _id.
+ originalCollection->deleteDocument(opCtx,
+ kUninitializedStmtId,
+ dupRecord,
+ nullptr /* opDebug */,
+ false /* fromMigrate */,
+ false /* noWarn */,
+ Collection::StoreDeletedDoc::Off,
+ CheckRecordId::On);
+
+ wuow.commit();
+ return docSize;
+ });
+}
+
+int repairMissingIndexEntry(OperationContext* opCtx,
+ std::shared_ptr<const IndexCatalogEntry>& index,
+ const KeyString::Value& ks,
+ const KeyFormat& keyFormat,
+ const NamespaceString& nss,
+ const CollectionPtr& coll,
+ ValidateResults* results) {
+ RecordId rid;
+ if (keyFormat == KeyFormat::Long) {
+ rid = KeyString::decodeRecordIdLongAtEnd(ks.getBuffer(), ks.getSize());
+ } else {
+ invariant(keyFormat == KeyFormat::String);
+ rid = KeyString::decodeRecordIdStrAtEnd(ks.getBuffer(), ks.getSize());
+ }
+
+ IndexAccessMethod* accessMethod = const_cast<IndexAccessMethod*>(index->accessMethod());
+ InsertDeleteOptions options;
+ options.dupsAllowed = !index->descriptor()->unique();
+ int64_t numInserted = 0;
+
+ writeConflictRetry(opCtx, "insertingMissingIndexEntries", nss.ns(), [&] {
+ WriteUnitOfWork wunit(opCtx);
+ // Ignore return status because we will use numInserted to verify success.
+ accessMethod
+ ->insertKeysAndUpdateMultikeyPaths(
+ opCtx, coll, {ks}, {}, {}, rid, options, nullptr, &numInserted)
+ .ignore();
+ wunit.commit();
+ });
+
+ const std::string& indexName = index->descriptor()->indexName();
+
+ // The insertKeysAndUpdateMultikeyPaths() may fail when there are missing index entries for
+ // duplicate documents.
+ if (numInserted > 0) {
+ auto& indexResults = results->indexResultsMap[indexName];
+ indexResults.keysTraversed += numInserted;
+ results->numInsertedMissingIndexEntries += numInserted;
+ results->repaired = true;
+ } else {
+ // Move the duplicate document of the missing index entry from the record store to the lost
+ // and found.
+ Snapshotted<BSONObj> doc;
+ if (coll->findDoc(opCtx, rid, &doc)) {
+ const NamespaceString lostAndFoundNss = NamespaceString(
+ NamespaceString::kLocalDb, "lost_and_found." + coll->uuid().toString());
+
+ auto moveStatus = moveRecordToLostAndFound(opCtx, nss, lostAndFoundNss, rid);
+ if (moveStatus.isOK() && (moveStatus.getValue() > 0)) {
+ auto& indexResults = results->indexResultsMap[indexName];
+ indexResults.keysRemovedFromRecordStore++;
+ results->numDocumentsMovedToLostAndFound++;
+ results->repaired = true;
+ } else {
+ results->errors.push_back(str::stream() << "unable to move record " << rid << " to "
+ << lostAndFoundNss.ns());
+ }
+ } else {
+ // If the missing index entry does not exist in the record store, then it has
+ // already been moved to the lost and found and is now outdated.
+ results->numOutdatedMissingIndexEntry++;
+ }
+ }
+ return numInserted;
+}
+
+} // namespace index_repair
+} // namespace mongo
diff --git a/src/mongo/db/catalog/index_repair.h b/src/mongo/db/catalog/index_repair.h
new file mode 100644
index 00000000000..a4bee38c011
--- /dev/null
+++ b/src/mongo/db/catalog/index_repair.h
@@ -0,0 +1,63 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/base/status_with.h"
+#include "mongo/db/catalog/validate_state.h"
+#include "mongo/db/operation_context.h"
+
+namespace mongo {
+namespace index_repair {
+
+/**
+ * Deletes the record containing a duplicate key and inserts it into a local lost and found
+ * collection titled "local.lost_and_found.<original collection UUID>". Returns the size of the
+ * record removed.
+ */
+StatusWith<int> moveRecordToLostAndFound(OperationContext* opCtx,
+ const NamespaceString& ns,
+ const NamespaceString& lostAndFoundNss,
+ RecordId dupRecord);
+
+/**
+ * If repair mode is enabled, tries the inserting missingIndexEntry into indexes. If the
+ * missingIndexEntry is a duplicate on a unique index, removes the duplicate document and keeps it
+ * in a local lost and found collection.
+ */
+int repairMissingIndexEntry(OperationContext* opCtx,
+ std::shared_ptr<const IndexCatalogEntry>& index,
+ const KeyString::Value& ks,
+ const KeyFormat& keyFormat,
+ const NamespaceString& nss,
+ const CollectionPtr& coll,
+ ValidateResults* results);
+
+} // namespace index_repair
+} // namespace mongo
diff --git a/src/mongo/db/catalog/validate_adaptor.cpp b/src/mongo/db/catalog/validate_adaptor.cpp
index 46b0439b29d..7d1d66ad3c5 100644
--- a/src/mongo/db/catalog/validate_adaptor.cpp
+++ b/src/mongo/db/catalog/validate_adaptor.cpp
@@ -639,6 +639,10 @@ void ValidateAdaptor::validateIndexKeyCount(OperationContext* opCtx,
IndexInfo* indexInfo = &_indexConsistency->getIndexInfo(indexName);
auto numTotalKeys = indexInfo->numKeys;
+ // Update numRecords by subtracting number of records removed from record store in repair mode
+ // when validating index consistency
+ _numRecords -= results.keysRemovedFromRecordStore;
+
// Do not fail on finding too few index entries compared to collection entries when full:false.
bool hasTooFewKeys = false;
bool noErrorOnTooFewKeys = !_validateState->isFullIndexValidation();
diff --git a/src/mongo/db/catalog/validate_results.cpp b/src/mongo/db/catalog/validate_results.cpp
index ac10247c06f..32b66f2bf92 100644
--- a/src/mongo/db/catalog/validate_results.cpp
+++ b/src/mongo/db/catalog/validate_results.cpp
@@ -69,6 +69,8 @@ void ValidateResults::appendToResultObj(BSONObjBuilder* resultObj, bool debuggin
resultObj->appendNumber("numRemovedCorruptRecords", numRemovedCorruptRecords);
resultObj->appendNumber("numRemovedExtraIndexEntries", numRemovedExtraIndexEntries);
resultObj->appendNumber("numInsertedMissingIndexEntries", numInsertedMissingIndexEntries);
+ resultObj->appendNumber("numDocumentsMovedToLostAndFound", numDocumentsMovedToLostAndFound);
+ resultObj->appendNumber("numOutdatedMissingIndexEntry", numOutdatedMissingIndexEntry);
}
}
} // namespace mongo
diff --git a/src/mongo/db/catalog/validate_results.h b/src/mongo/db/catalog/validate_results.h
index 9e0062d8fb3..baa6d78ad42 100644
--- a/src/mongo/db/catalog/validate_results.h
+++ b/src/mongo/db/catalog/validate_results.h
@@ -45,6 +45,7 @@ struct IndexValidateResults {
std::vector<std::string> warnings;
int64_t keysTraversed = 0;
int64_t keysTraversedFromFullValidate = 0;
+ int64_t keysRemovedFromRecordStore = 0;
};
using ValidateResultsMap = std::map<std::string, IndexValidateResults>;
@@ -62,6 +63,8 @@ struct ValidateResults {
long long numRemovedCorruptRecords = 0;
long long numRemovedExtraIndexEntries = 0;
long long numInsertedMissingIndexEntries = 0;
+ long long numDocumentsMovedToLostAndFound = 0;
+ long long numOutdatedMissingIndexEntry = 0;
// Maps index names to index-specific validation results.
ValidateResultsMap indexResultsMap;