diff options
author | Eric Milkie <milkie@mongodb.com> | 2020-01-06 21:03:09 +0000 |
---|---|---|
committer | evergreen <evergreen@mongodb.com> | 2020-01-06 21:03:09 +0000 |
commit | 12d44ec1d9a03ed3d0828096bb32de925235c447 (patch) | |
tree | ec1a26abad1e82be3ffb59354f5ef0e450e70fc8 | |
parent | 29c8868b7c1c79ca49341ee68d9e517a5c88f38f (diff) | |
download | mongo-12d44ec1d9a03ed3d0828096bb32de925235c447.tar.gz |
SERVER-44904 startup recovery should not delete corrupt docs while rebuilding unfinished indexes
-rw-r--r-- | src/mongo/db/catalog/catalog_control.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/catalog/index_builds_manager.cpp | 11 | ||||
-rw-r--r-- | src/mongo/db/catalog/index_builds_manager.h | 7 | ||||
-rw-r--r-- | src/mongo/db/index_builds_coordinator.cpp | 15 | ||||
-rw-r--r-- | src/mongo/db/index_builds_coordinator.h | 16 | ||||
-rw-r--r-- | src/mongo/db/repair_database.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/repair_database.h | 6 | ||||
-rw-r--r-- | src/mongo/db/repair_database_and_check_version.cpp | 2 |
8 files changed, 45 insertions, 23 deletions
diff --git a/src/mongo/db/catalog/catalog_control.cpp b/src/mongo/db/catalog/catalog_control.cpp index 351d709b5d0..3ba439ae0b2 100644 --- a/src/mongo/db/catalog/catalog_control.cpp +++ b/src/mongo/db/catalog/catalog_control.cpp @@ -154,7 +154,7 @@ void openCatalog(OperationContext* opCtx, const MinVisibleTimestampMap& minVisib } std::vector<BSONObj> indexSpecs = entry.second.second; - fassert(40690, rebuildIndexesOnCollection(opCtx, collection, indexSpecs)); + fassert(40690, rebuildIndexesOnCollection(opCtx, collection, indexSpecs, RepairData::kNo)); } // Once all unfinished index builds have been dropped and the catalog has been reloaded, restart diff --git a/src/mongo/db/catalog/index_builds_manager.cpp b/src/mongo/db/catalog/index_builds_manager.cpp index bbb3cf32c0f..cedd17fa84a 100644 --- a/src/mongo/db/catalog/index_builds_manager.cpp +++ b/src/mongo/db/catalog/index_builds_manager.cpp @@ -132,14 +132,14 @@ Status IndexBuildsManager::startBuildingIndex(OperationContext* opCtx, } StatusWith<std::pair<long long, long long>> IndexBuildsManager::startBuildingIndexForRecovery( - OperationContext* opCtx, NamespaceString ns, const UUID& buildUUID) { + OperationContext* opCtx, NamespaceString ns, const UUID& buildUUID, RepairData repair) { auto builder = _getBuilder(buildUUID); auto coll = CollectionCatalog::get(opCtx).lookupCollectionByNamespace(opCtx, ns); auto rs = coll ? coll->getRecordStore() : nullptr; - // Iterate all records in the collection. Delete them if they aren't valid BSON. Index them - // if they are. + // Iterate all records in the collection. Validate the records and index them + // if they are valid. Delete them (if in repair mode), or crash, if they are not valid. long long numRecords = 0; long long dataSize = 0; @@ -162,6 +162,11 @@ StatusWith<std::pair<long long, long long>> IndexBuildsManager::startBuildingInd // database even if decimal is disabled. auto validStatus = validateBSON(data.data(), data.size(), BSONVersion::kLatest); if (!validStatus.isOK()) { + if (repair == RepairData::kNo) { + severe() << "Invalid BSON detected at " << id << ": " + << redact(validStatus); + fassertFailed(31396); + } warning() << "Invalid BSON detected at " << id << ": " << redact(validStatus) << ". Deleting."; rs->deleteRecord(opCtx, id); diff --git a/src/mongo/db/catalog/index_builds_manager.h b/src/mongo/db/catalog/index_builds_manager.h index 7cbc05a982c..03153c8a2c2 100644 --- a/src/mongo/db/catalog/index_builds_manager.h +++ b/src/mongo/db/catalog/index_builds_manager.h @@ -36,6 +36,7 @@ #include "mongo/db/catalog/multi_index_block.h" #include "mongo/db/namespace_string.h" +#include "mongo/db/repair_database.h" #include "mongo/db/repl_index_build_state.h" #include "mongo/platform/mutex.h" @@ -107,13 +108,13 @@ public: const UUID& buildUUID); /** - * Iterates through every record in the collection to index it while also removing documents - * that are not valid BSON objects. + * Iterates through every record in the collection to index it. May also remove documents + * that are not valid BSON objects, if repair is set to kYes. * * Returns the number of records and the size of the data iterated over. */ StatusWith<std::pair<long long, long long>> startBuildingIndexForRecovery( - OperationContext* opCtx, NamespaceString ns, const UUID& buildUUID); + OperationContext* opCtx, NamespaceString ns, const UUID& buildUUID, RepairData repair); /** * Document inserts observed during the scanning/insertion phase of an index build are not diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp index 5e52d4403c9..868116f0b06 100644 --- a/src/mongo/db/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds_coordinator.cpp @@ -306,7 +306,8 @@ StatusWith<std::pair<long long, long long>> IndexBuildsCoordinator::rebuildIndex OperationContext* opCtx, const NamespaceString& nss, const std::vector<BSONObj>& specs, - const UUID& buildUUID) { + const UUID& buildUUID, + RepairData repair) { const auto protocol = IndexBuildProtocol::kSinglePhase; auto status = _startIndexBuildForRecovery(opCtx, nss, specs, buildUUID, protocol); @@ -318,7 +319,7 @@ StatusWith<std::pair<long long, long long>> IndexBuildsCoordinator::rebuildIndex Collection* collection = collectionCatalog.lookupCollectionByNamespace(opCtx, nss); // Complete the index build. - return _runIndexRebuildForRecovery(opCtx, collection, buildUUID); + return _runIndexRebuildForRecovery(opCtx, collection, buildUUID, repair); } Status IndexBuildsCoordinator::_startIndexBuildForRecovery(OperationContext* opCtx, @@ -1756,7 +1757,10 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesAndCommit( } StatusWith<std::pair<long long, long long>> IndexBuildsCoordinator::_runIndexRebuildForRecovery( - OperationContext* opCtx, Collection* collection, const UUID& buildUUID) noexcept { + OperationContext* opCtx, + Collection* collection, + const UUID& buildUUID, + RepairData repair) noexcept { invariant(opCtx->lockState()->isCollectionLockedForMode(collection->ns(), MODE_X)); auto replState = invariant(_getIndexBuild(buildUUID)); @@ -1777,8 +1781,9 @@ StatusWith<std::pair<long long, long long>> IndexBuildsCoordinator::_runIndexReb try { log() << "Index builds manager starting: " << buildUUID << ": " << nss; - std::tie(numRecords, dataSize) = uassertStatusOK( - _indexBuildsManager.startBuildingIndexForRecovery(opCtx, collection->ns(), buildUUID)); + std::tie(numRecords, dataSize) = + uassertStatusOK(_indexBuildsManager.startBuildingIndexForRecovery( + opCtx, collection->ns(), buildUUID, repair)); uassertStatusOK( _indexBuildsManager.checkIndexConstraintViolations(opCtx, replState->buildUUID)); diff --git a/src/mongo/db/index_builds_coordinator.h b/src/mongo/db/index_builds_coordinator.h index 001d699cb08..292bdff3eb3 100644 --- a/src/mongo/db/index_builds_coordinator.h +++ b/src/mongo/db/index_builds_coordinator.h @@ -42,6 +42,7 @@ #include "mongo/db/concurrency/d_concurrency.h" #include "mongo/db/database_index_builds_tracker.h" #include "mongo/db/namespace_string.h" +#include "mongo/db/repair_database.h" #include "mongo/db/repl_index_build_state.h" #include "mongo/db/storage/durable_catalog.h" #include "mongo/platform/mutex.h" @@ -145,8 +146,9 @@ public: /** * Runs the full index rebuild for recovery. This will only rebuild single-phase index builds. - * Rebuilding an index in recovery mode verifies each document to ensure that it is a valid - * BSON object. It will remove any documents with invalid BSON. + * Rebuilding an index in recovery mode verifies the BSON format of each document. Upon + * discovery of corruption, if 'repair' is kYes, this function will remove any documents with + * invalid BSON; otherwise, it will abort the server process. * * Returns the number of records and the size of the data iterated over, if successful. */ @@ -154,7 +156,8 @@ public: OperationContext* opCtx, const NamespaceString& nss, const std::vector<BSONObj>& specs, - const UUID& buildUUID); + const UUID& buildUUID, + RepairData repair); /** * Signals the index build identified by 'buildUUID' to commit, and waits for its thread to @@ -588,12 +591,15 @@ protected: /** * Runs the index build. * Rebuilding an index in recovery mode verifies each document to ensure that it is a valid - * BSON object. It will remove any documents with invalid BSON. + * BSON object. If repair is kYes, it will remove any documents with invalid BSON. * * Returns the number of records and the size of the data iterated over, if successful. */ StatusWith<std::pair<long long, long long>> _runIndexRebuildForRecovery( - OperationContext* opCtx, Collection* collection, const UUID& buildUUID) noexcept; + OperationContext* opCtx, + Collection* collection, + const UUID& buildUUID, + RepairData repair) noexcept; /** * Looks up active index build by UUID. diff --git a/src/mongo/db/repair_database.cpp b/src/mongo/db/repair_database.cpp index 533e0df0c6d..47283e8719b 100644 --- a/src/mongo/db/repair_database.cpp +++ b/src/mongo/db/repair_database.cpp @@ -109,7 +109,8 @@ StatusWith<IndexNameObjs> getIndexNameObjs(OperationContext* opCtx, Status rebuildIndexesOnCollection(OperationContext* opCtx, Collection* collection, - const std::vector<BSONObj>& indexSpecs) { + const std::vector<BSONObj>& indexSpecs, + RepairData repair) { // Skip the rest if there are no indexes to rebuild. if (indexSpecs.empty()) return Status::OK(); @@ -117,8 +118,8 @@ Status rebuildIndexesOnCollection(OperationContext* opCtx, // Rebuild the indexes provided by 'indexSpecs'. IndexBuildsCoordinator* indexBuildsCoord = IndexBuildsCoordinator::get(opCtx); UUID buildUUID = UUID::gen(); - auto swRebuild = - indexBuildsCoord->rebuildIndexesForRecovery(opCtx, collection->ns(), indexSpecs, buildUUID); + auto swRebuild = indexBuildsCoord->rebuildIndexesForRecovery( + opCtx, collection->ns(), indexSpecs, buildUUID, repair); if (!swRebuild.isOK()) { return swRebuild.getStatus(); } @@ -161,7 +162,7 @@ Status repairCollections(OperationContext* opCtx, return swIndexNameObjs.getStatus(); std::vector<BSONObj> indexSpecs = swIndexNameObjs.getValue().second; - Status status = rebuildIndexesOnCollection(opCtx, collection, indexSpecs); + Status status = rebuildIndexesOnCollection(opCtx, collection, indexSpecs, RepairData::kYes); if (!status.isOK()) return status; diff --git a/src/mongo/db/repair_database.h b/src/mongo/db/repair_database.h index 6989515b7c6..4f3ae143bdd 100644 --- a/src/mongo/db/repair_database.h +++ b/src/mongo/db/repair_database.h @@ -60,10 +60,14 @@ StatusWith<IndexNameObjs> getIndexNameObjs(OperationContext* opCtx, /** * Rebuilds the indexes provided by the 'indexSpecs' on the given collection. * One example usage is when a 'dropIndex' command is rolled back. The dropped index must be remade. + * When 'repair' is set to kYes, this function will delete corrupt records when found, rather than + * crashing. */ +enum class RepairData { kYes, kNo }; Status rebuildIndexesOnCollection(OperationContext* opCtx, Collection* collection, - const std::vector<BSONObj>& indexSpecs); + const std::vector<BSONObj>& indexSpecs, + RepairData repair); /** * Repairs a database using a storage engine-specific, best-effort process. diff --git a/src/mongo/db/repair_database_and_check_version.cpp b/src/mongo/db/repair_database_and_check_version.cpp index 741976143eb..699b2d2fcc5 100644 --- a/src/mongo/db/repair_database_and_check_version.cpp +++ b/src/mongo/db/repair_database_and_check_version.cpp @@ -312,7 +312,7 @@ void rebuildIndexes(OperationContext* opCtx, StorageEngine* storageEngine) { } std::vector<BSONObj> indexSpecs = entry.second.second; - fassert(40592, rebuildIndexesOnCollection(opCtx, collection, indexSpecs)); + fassert(40592, rebuildIndexesOnCollection(opCtx, collection, indexSpecs, RepairData::kNo)); } // Once all unfinished indexes have been rebuilt, restart any unfinished index builds. This will |