summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Milkie <milkie@mongodb.com>2020-01-06 21:03:09 +0000
committerevergreen <evergreen@mongodb.com>2020-01-06 21:03:09 +0000
commit12d44ec1d9a03ed3d0828096bb32de925235c447 (patch)
treeec1a26abad1e82be3ffb59354f5ef0e450e70fc8
parent29c8868b7c1c79ca49341ee68d9e517a5c88f38f (diff)
downloadmongo-12d44ec1d9a03ed3d0828096bb32de925235c447.tar.gz
SERVER-44904 startup recovery should not delete corrupt docs while rebuilding unfinished indexes
-rw-r--r--src/mongo/db/catalog/catalog_control.cpp2
-rw-r--r--src/mongo/db/catalog/index_builds_manager.cpp11
-rw-r--r--src/mongo/db/catalog/index_builds_manager.h7
-rw-r--r--src/mongo/db/index_builds_coordinator.cpp15
-rw-r--r--src/mongo/db/index_builds_coordinator.h16
-rw-r--r--src/mongo/db/repair_database.cpp9
-rw-r--r--src/mongo/db/repair_database.h6
-rw-r--r--src/mongo/db/repair_database_and_check_version.cpp2
8 files changed, 45 insertions, 23 deletions
diff --git a/src/mongo/db/catalog/catalog_control.cpp b/src/mongo/db/catalog/catalog_control.cpp
index 351d709b5d0..3ba439ae0b2 100644
--- a/src/mongo/db/catalog/catalog_control.cpp
+++ b/src/mongo/db/catalog/catalog_control.cpp
@@ -154,7 +154,7 @@ void openCatalog(OperationContext* opCtx, const MinVisibleTimestampMap& minVisib
}
std::vector<BSONObj> indexSpecs = entry.second.second;
- fassert(40690, rebuildIndexesOnCollection(opCtx, collection, indexSpecs));
+ fassert(40690, rebuildIndexesOnCollection(opCtx, collection, indexSpecs, RepairData::kNo));
}
// Once all unfinished index builds have been dropped and the catalog has been reloaded, restart
diff --git a/src/mongo/db/catalog/index_builds_manager.cpp b/src/mongo/db/catalog/index_builds_manager.cpp
index bbb3cf32c0f..cedd17fa84a 100644
--- a/src/mongo/db/catalog/index_builds_manager.cpp
+++ b/src/mongo/db/catalog/index_builds_manager.cpp
@@ -132,14 +132,14 @@ Status IndexBuildsManager::startBuildingIndex(OperationContext* opCtx,
}
StatusWith<std::pair<long long, long long>> IndexBuildsManager::startBuildingIndexForRecovery(
- OperationContext* opCtx, NamespaceString ns, const UUID& buildUUID) {
+ OperationContext* opCtx, NamespaceString ns, const UUID& buildUUID, RepairData repair) {
auto builder = _getBuilder(buildUUID);
auto coll = CollectionCatalog::get(opCtx).lookupCollectionByNamespace(opCtx, ns);
auto rs = coll ? coll->getRecordStore() : nullptr;
- // Iterate all records in the collection. Delete them if they aren't valid BSON. Index them
- // if they are.
+ // Iterate all records in the collection. Validate the records and index them
+ // if they are valid. Delete them (if in repair mode), or crash, if they are not valid.
long long numRecords = 0;
long long dataSize = 0;
@@ -162,6 +162,11 @@ StatusWith<std::pair<long long, long long>> IndexBuildsManager::startBuildingInd
// database even if decimal is disabled.
auto validStatus = validateBSON(data.data(), data.size(), BSONVersion::kLatest);
if (!validStatus.isOK()) {
+ if (repair == RepairData::kNo) {
+ severe() << "Invalid BSON detected at " << id << ": "
+ << redact(validStatus);
+ fassertFailed(31396);
+ }
warning() << "Invalid BSON detected at " << id << ": " << redact(validStatus)
<< ". Deleting.";
rs->deleteRecord(opCtx, id);
diff --git a/src/mongo/db/catalog/index_builds_manager.h b/src/mongo/db/catalog/index_builds_manager.h
index 7cbc05a982c..03153c8a2c2 100644
--- a/src/mongo/db/catalog/index_builds_manager.h
+++ b/src/mongo/db/catalog/index_builds_manager.h
@@ -36,6 +36,7 @@
#include "mongo/db/catalog/multi_index_block.h"
#include "mongo/db/namespace_string.h"
+#include "mongo/db/repair_database.h"
#include "mongo/db/repl_index_build_state.h"
#include "mongo/platform/mutex.h"
@@ -107,13 +108,13 @@ public:
const UUID& buildUUID);
/**
- * Iterates through every record in the collection to index it while also removing documents
- * that are not valid BSON objects.
+ * Iterates through every record in the collection to index it. May also remove documents
+ * that are not valid BSON objects, if repair is set to kYes.
*
* Returns the number of records and the size of the data iterated over.
*/
StatusWith<std::pair<long long, long long>> startBuildingIndexForRecovery(
- OperationContext* opCtx, NamespaceString ns, const UUID& buildUUID);
+ OperationContext* opCtx, NamespaceString ns, const UUID& buildUUID, RepairData repair);
/**
* Document inserts observed during the scanning/insertion phase of an index build are not
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp
index 5e52d4403c9..868116f0b06 100644
--- a/src/mongo/db/index_builds_coordinator.cpp
+++ b/src/mongo/db/index_builds_coordinator.cpp
@@ -306,7 +306,8 @@ StatusWith<std::pair<long long, long long>> IndexBuildsCoordinator::rebuildIndex
OperationContext* opCtx,
const NamespaceString& nss,
const std::vector<BSONObj>& specs,
- const UUID& buildUUID) {
+ const UUID& buildUUID,
+ RepairData repair) {
const auto protocol = IndexBuildProtocol::kSinglePhase;
auto status = _startIndexBuildForRecovery(opCtx, nss, specs, buildUUID, protocol);
@@ -318,7 +319,7 @@ StatusWith<std::pair<long long, long long>> IndexBuildsCoordinator::rebuildIndex
Collection* collection = collectionCatalog.lookupCollectionByNamespace(opCtx, nss);
// Complete the index build.
- return _runIndexRebuildForRecovery(opCtx, collection, buildUUID);
+ return _runIndexRebuildForRecovery(opCtx, collection, buildUUID, repair);
}
Status IndexBuildsCoordinator::_startIndexBuildForRecovery(OperationContext* opCtx,
@@ -1756,7 +1757,10 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesAndCommit(
}
StatusWith<std::pair<long long, long long>> IndexBuildsCoordinator::_runIndexRebuildForRecovery(
- OperationContext* opCtx, Collection* collection, const UUID& buildUUID) noexcept {
+ OperationContext* opCtx,
+ Collection* collection,
+ const UUID& buildUUID,
+ RepairData repair) noexcept {
invariant(opCtx->lockState()->isCollectionLockedForMode(collection->ns(), MODE_X));
auto replState = invariant(_getIndexBuild(buildUUID));
@@ -1777,8 +1781,9 @@ StatusWith<std::pair<long long, long long>> IndexBuildsCoordinator::_runIndexReb
try {
log() << "Index builds manager starting: " << buildUUID << ": " << nss;
- std::tie(numRecords, dataSize) = uassertStatusOK(
- _indexBuildsManager.startBuildingIndexForRecovery(opCtx, collection->ns(), buildUUID));
+ std::tie(numRecords, dataSize) =
+ uassertStatusOK(_indexBuildsManager.startBuildingIndexForRecovery(
+ opCtx, collection->ns(), buildUUID, repair));
uassertStatusOK(
_indexBuildsManager.checkIndexConstraintViolations(opCtx, replState->buildUUID));
diff --git a/src/mongo/db/index_builds_coordinator.h b/src/mongo/db/index_builds_coordinator.h
index 001d699cb08..292bdff3eb3 100644
--- a/src/mongo/db/index_builds_coordinator.h
+++ b/src/mongo/db/index_builds_coordinator.h
@@ -42,6 +42,7 @@
#include "mongo/db/concurrency/d_concurrency.h"
#include "mongo/db/database_index_builds_tracker.h"
#include "mongo/db/namespace_string.h"
+#include "mongo/db/repair_database.h"
#include "mongo/db/repl_index_build_state.h"
#include "mongo/db/storage/durable_catalog.h"
#include "mongo/platform/mutex.h"
@@ -145,8 +146,9 @@ public:
/**
* Runs the full index rebuild for recovery. This will only rebuild single-phase index builds.
- * Rebuilding an index in recovery mode verifies each document to ensure that it is a valid
- * BSON object. It will remove any documents with invalid BSON.
+ * Rebuilding an index in recovery mode verifies the BSON format of each document. Upon
+ * discovery of corruption, if 'repair' is kYes, this function will remove any documents with
+ * invalid BSON; otherwise, it will abort the server process.
*
* Returns the number of records and the size of the data iterated over, if successful.
*/
@@ -154,7 +156,8 @@ public:
OperationContext* opCtx,
const NamespaceString& nss,
const std::vector<BSONObj>& specs,
- const UUID& buildUUID);
+ const UUID& buildUUID,
+ RepairData repair);
/**
* Signals the index build identified by 'buildUUID' to commit, and waits for its thread to
@@ -588,12 +591,15 @@ protected:
/**
* Runs the index build.
* Rebuilding an index in recovery mode verifies each document to ensure that it is a valid
- * BSON object. It will remove any documents with invalid BSON.
+ * BSON object. If repair is kYes, it will remove any documents with invalid BSON.
*
* Returns the number of records and the size of the data iterated over, if successful.
*/
StatusWith<std::pair<long long, long long>> _runIndexRebuildForRecovery(
- OperationContext* opCtx, Collection* collection, const UUID& buildUUID) noexcept;
+ OperationContext* opCtx,
+ Collection* collection,
+ const UUID& buildUUID,
+ RepairData repair) noexcept;
/**
* Looks up active index build by UUID.
diff --git a/src/mongo/db/repair_database.cpp b/src/mongo/db/repair_database.cpp
index 533e0df0c6d..47283e8719b 100644
--- a/src/mongo/db/repair_database.cpp
+++ b/src/mongo/db/repair_database.cpp
@@ -109,7 +109,8 @@ StatusWith<IndexNameObjs> getIndexNameObjs(OperationContext* opCtx,
Status rebuildIndexesOnCollection(OperationContext* opCtx,
Collection* collection,
- const std::vector<BSONObj>& indexSpecs) {
+ const std::vector<BSONObj>& indexSpecs,
+ RepairData repair) {
// Skip the rest if there are no indexes to rebuild.
if (indexSpecs.empty())
return Status::OK();
@@ -117,8 +118,8 @@ Status rebuildIndexesOnCollection(OperationContext* opCtx,
// Rebuild the indexes provided by 'indexSpecs'.
IndexBuildsCoordinator* indexBuildsCoord = IndexBuildsCoordinator::get(opCtx);
UUID buildUUID = UUID::gen();
- auto swRebuild =
- indexBuildsCoord->rebuildIndexesForRecovery(opCtx, collection->ns(), indexSpecs, buildUUID);
+ auto swRebuild = indexBuildsCoord->rebuildIndexesForRecovery(
+ opCtx, collection->ns(), indexSpecs, buildUUID, repair);
if (!swRebuild.isOK()) {
return swRebuild.getStatus();
}
@@ -161,7 +162,7 @@ Status repairCollections(OperationContext* opCtx,
return swIndexNameObjs.getStatus();
std::vector<BSONObj> indexSpecs = swIndexNameObjs.getValue().second;
- Status status = rebuildIndexesOnCollection(opCtx, collection, indexSpecs);
+ Status status = rebuildIndexesOnCollection(opCtx, collection, indexSpecs, RepairData::kYes);
if (!status.isOK())
return status;
diff --git a/src/mongo/db/repair_database.h b/src/mongo/db/repair_database.h
index 6989515b7c6..4f3ae143bdd 100644
--- a/src/mongo/db/repair_database.h
+++ b/src/mongo/db/repair_database.h
@@ -60,10 +60,14 @@ StatusWith<IndexNameObjs> getIndexNameObjs(OperationContext* opCtx,
/**
* Rebuilds the indexes provided by the 'indexSpecs' on the given collection.
* One example usage is when a 'dropIndex' command is rolled back. The dropped index must be remade.
+ * When 'repair' is set to kYes, this function will delete corrupt records when found, rather than
+ * crashing.
*/
+enum class RepairData { kYes, kNo };
Status rebuildIndexesOnCollection(OperationContext* opCtx,
Collection* collection,
- const std::vector<BSONObj>& indexSpecs);
+ const std::vector<BSONObj>& indexSpecs,
+ RepairData repair);
/**
* Repairs a database using a storage engine-specific, best-effort process.
diff --git a/src/mongo/db/repair_database_and_check_version.cpp b/src/mongo/db/repair_database_and_check_version.cpp
index 741976143eb..699b2d2fcc5 100644
--- a/src/mongo/db/repair_database_and_check_version.cpp
+++ b/src/mongo/db/repair_database_and_check_version.cpp
@@ -312,7 +312,7 @@ void rebuildIndexes(OperationContext* opCtx, StorageEngine* storageEngine) {
}
std::vector<BSONObj> indexSpecs = entry.second.second;
- fassert(40592, rebuildIndexesOnCollection(opCtx, collection, indexSpecs));
+ fassert(40592, rebuildIndexesOnCollection(opCtx, collection, indexSpecs, RepairData::kNo));
}
// Once all unfinished indexes have been rebuilt, restart any unfinished index builds. This will