diff options
author | Louis Williams <louis.williams@mongodb.com> | 2018-08-01 18:19:36 -0400 |
---|---|---|
committer | Louis Williams <louis.williams@mongodb.com> | 2018-08-17 16:12:55 -0400 |
commit | 17686781044525b9c3fbdf06ca326c8f4fb383ba (patch) | |
tree | 9038e76700d4f3aab773c3c0c8beb986a48cd339 /src/mongo/db/repair_database_and_check_version.cpp | |
parent | 59d4d78a68ef6347120c6dfcd6da6ec3d325722f (diff) | |
download | mongo-17686781044525b9c3fbdf06ca326c8f4fb383ba.tar.gz |
SERVER-35731 Prevent a repaired node from re-joining a replica set
Diffstat (limited to 'src/mongo/db/repair_database_and_check_version.cpp')
-rw-r--r-- | src/mongo/db/repair_database_and_check_version.cpp | 72 |
1 files changed, 47 insertions, 25 deletions
diff --git a/src/mongo/db/repair_database_and_check_version.cpp b/src/mongo/db/repair_database_and_check_version.cpp index fb40137c9e2..672556ddedf 100644 --- a/src/mongo/db/repair_database_and_check_version.cpp +++ b/src/mongo/db/repair_database_and_check_version.cpp @@ -50,7 +50,9 @@ #include "mongo/db/repl/drop_pending_collection_reaper.h" #include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/server_options.h" +#include "mongo/db/storage/storage_repair_observer.h" #include "mongo/util/exit.h" +#include "mongo/util/fail_point.h" #include "mongo/util/log.h" #include "mongo/util/quick_exit.h" #include "mongo/util/version.h" @@ -64,6 +66,11 @@ namespace mongo { using logger::LogComponent; using std::endl; +// Exit after repair has started, but before data is repaired. +MONGO_FAIL_POINT_DEFINE(exitBeforeDataRepair); +// Exit after repairing data, but before the replica set configuration is invalidated. +MONGO_FAIL_POINT_DEFINE(exitBeforeRepairInvalidatesConfig); + namespace { const std::string mustDowngradeErrorMsg = str::stream() @@ -180,22 +187,12 @@ const NamespaceString startupLogCollectionName("local.startup_log"); const NamespaceString kSystemReplSetCollection("local.system.replset"); /** - * Checks if this server was started without --replset but has a config in local.system.replset - * (meaning that this is probably a replica set member started in stand-alone mode). - * - * @returns the number of documents in local.system.replset or 0 if this was started with - * --replset. + * Returns 'true' if this server has a configuration document in local.system.replset. */ -unsigned long long checkIfReplMissingFromCommandLine(OperationContext* opCtx) { - // This is helpful for the query below to work as you can't open files when readlocked +bool hasReplSetConfigDoc(OperationContext* opCtx) { Lock::GlobalWrite lk(opCtx); - if (!repl::ReplicationCoordinator::get(getGlobalServiceContext()) - ->getSettings() - .usingReplSets()) { - DBDirectClient c(opCtx); - return c.count(kSystemReplSetCollection.ns()); - } - return 0; + BSONObj config; + return Helpers::getSingleton(opCtx, kSystemReplSetCollection.ns().c_str(), config); } /** @@ -284,10 +281,7 @@ void rebuildIndexes(OperationContext* opCtx, StorageEngine* storageEngine) { * represents whether there are non-local databases. */ StatusWith<bool> repairDatabasesAndCheckVersion(OperationContext* opCtx) { - LOG(1) << "enter repairDatabases (to check pdfile version #)"; - auto const storageEngine = opCtx->getServiceContext()->getStorageEngine(); - Lock::GlobalWrite lk(opCtx); std::vector<std::string> dbNames; @@ -304,9 +298,15 @@ StatusWith<bool> repairDatabasesAndCheckVersion(OperationContext* opCtx) { // Repair all databases first, so that we do not try to open them if they are in bad shape if (storageGlobalParams.repair) { invariant(!storageGlobalParams.readOnly); + + if (MONGO_FAIL_POINT(exitBeforeDataRepair)) { + log() << "Exiting because 'exitBeforeDataRepair' fail point was set."; + quickExit(EXIT_ABRUPT); + } + for (const auto& dbName : dbNames) { LOG(1) << " Repairing database: " << dbName; - fassert(18506, repairDatabase(opCtx, storageEngine, dbName)); + fassertNoTrace(18506, repairDatabase(opCtx, storageEngine, dbName)); } // All collections must have UUIDs before restoring the FCV document to a version that @@ -344,16 +344,38 @@ StatusWith<bool> repairDatabasesAndCheckVersion(OperationContext* opCtx) { } if (!storageGlobalParams.readOnly) { - // We open the "local" database before calling checkIfReplMissingFromCommandLine() to - // ensure the in-memory catalog entries for the 'kSystemReplSetCollection' collection have - // been populated if the collection exists. If the "local" database didn't exist at this - // point yet, then it will be created. If the mongod is running in a read-only mode, then - // it is fine to not open the "local" database and populate the catalog entries because we - // won't attempt to drop the temporary collections anyway. + // We open the "local" database before calling hasReplSetConfigDoc() to ensure the in-memory + // catalog entries for the 'kSystemReplSetCollection' collection have been populated if the + // collection exists. If the "local" database didn't exist at this point yet, then it will + // be created. If the mongod is running in a read-only mode, then it is fine to not open the + // "local" database and populate the catalog entries because we won't attempt to drop the + // temporary collections anyway. Lock::DBLock dbLock(opCtx, kSystemReplSetCollection.db(), MODE_X); DatabaseHolder::getDatabaseHolder().openDb(opCtx, kSystemReplSetCollection.db()); } + if (storageGlobalParams.repair) { + if (MONGO_FAIL_POINT(exitBeforeRepairInvalidatesConfig)) { + log() << "Exiting because 'exitBeforeRepairInvalidatesConfig' fail point was set."; + quickExit(EXIT_ABRUPT); + } + // This must be done after opening the "local" database as it modifies the replica set + // config. + auto repairObserver = StorageRepairObserver::get(opCtx->getServiceContext()); + repairObserver->onRepairDone(opCtx); + if (repairObserver->isDataModified()) { + warning() << "Modifications made by repair:"; + const auto& mods = repairObserver->getModifications(); + for (const auto& mod : mods) { + warning() << " " << mod; + } + if (hasReplSetConfigDoc(opCtx)) { + warning() << "WARNING: Repair may have modified replicated data. This node will no " + "longer be able to join a replica set without a full re-sync"; + } + } + } + const repl::ReplSettings& replSettings = repl::ReplicationCoordinator::get(opCtx)->getSettings(); @@ -362,7 +384,7 @@ StatusWith<bool> repairDatabasesAndCheckVersion(OperationContext* opCtx) { // to. The local DB is special because it is not replicated. See SERVER-10927 for more // details. const bool shouldClearNonLocalTmpCollections = - !(checkIfReplMissingFromCommandLine(opCtx) || replSettings.usingReplSets()); + !(hasReplSetConfigDoc(opCtx) || replSettings.usingReplSets()); // To check whether a featureCompatibilityVersion document exists. bool fcvDocumentExists = false; |