diff options
author | Judah Schvimer <judah@mongodb.com> | 2017-10-09 11:15:27 -0400 |
---|---|---|
committer | Judah Schvimer <judah@mongodb.com> | 2017-10-09 11:15:27 -0400 |
commit | d7a30a716243db13644a16618a939df6bc1344fc (patch) | |
tree | 214e76dbe81dc940bf5e2392cbf2938f73a026c4 /src | |
parent | a6db2621e6376df07f368ab4c4028843015dfa51 (diff) | |
download | mongo-d7a30a716243db13644a16618a939df6bc1344fc.tar.gz |
SERVER-31019 fail initial sync if fCV changes during oplog application
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/catalog/database_impl.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/repl/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/repl/collection_cloner.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/repl/database_cloner.cpp | 24 | ||||
-rw-r--r-- | src/mongo/db/repl/database_cloner.h | 5 | ||||
-rw-r--r-- | src/mongo/db/repl/initial_syncer.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/repl/oplog.cpp | 31 | ||||
-rw-r--r-- | src/mongo/db/repl/sync_tail_test.cpp | 77 | ||||
-rw-r--r-- | src/mongo/db/server_options.h | 5 |
9 files changed, 158 insertions, 6 deletions
diff --git a/src/mongo/db/catalog/database_impl.cpp b/src/mongo/db/catalog/database_impl.cpp index 161119ef16c..87630ec52b1 100644 --- a/src/mongo/db/catalog/database_impl.cpp +++ b/src/mongo/db/catalog/database_impl.cpp @@ -771,8 +771,10 @@ Collection* DatabaseImpl::createCollection(OperationContext* opCtx, coordinator->canAcceptWritesForDatabase(opCtx, nss.db()) || nss.isSystemDotProfile()); // system.profile is special as it's not replicated if (!okayCreation) { - severe() << "Attempt to assign UUID to replicated collection: " << nss.ns(); - fassertFailed(40643); + std::string msg = str::stream() << "Attempt to assign UUID to replicated collection: " + << nss.ns(); + severe() << msg; + uasserted(ErrorCodes::InvalidOptions, msg); } optionsWithUUID.uuid.emplace(CollectionUUID::gen()); } diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript index 0b1cf62ad90..08592d21e55 100644 --- a/src/mongo/db/repl/SConscript +++ b/src/mongo/db/repl/SConscript @@ -14,6 +14,7 @@ env.Library( '$BUILD_DIR/mongo/base', '$BUILD_DIR/mongo/db/background', '$BUILD_DIR/mongo/db/catalog/catalog_helpers', + '$BUILD_DIR/mongo/db/commands/dcommands_fcv', '$BUILD_DIR/mongo/db/db_raii', '$BUILD_DIR/mongo/db/dbdirectclient', '$BUILD_DIR/mongo/db/dbhelpers', diff --git a/src/mongo/db/repl/collection_cloner.cpp b/src/mongo/db/repl/collection_cloner.cpp index 036f0b94af0..289941e94e8 100644 --- a/src/mongo/db/repl/collection_cloner.cpp +++ b/src/mongo/db/repl/collection_cloner.cpp @@ -144,9 +144,13 @@ CollectionCloner::CollectionCloner(executor::TaskExecutor* executor, _documentsToInsert(), _dbWorkTaskRunner(_dbWorkThreadPool), _scheduleDbWorkFn([this](const executor::TaskExecutor::CallbackFn& work) { - auto task = [work](OperationContext* opCtx, - const Status& status) -> TaskRunner::NextAction { - work(executor::TaskExecutor::CallbackArgs(nullptr, {}, status, opCtx)); + auto task = [ this, work ](OperationContext * opCtx, + const Status& status) noexcept->TaskRunner::NextAction { + try { + work(executor::TaskExecutor::CallbackArgs(nullptr, {}, status, opCtx)); + } catch (...) { + _finishCallback(exceptionToStatus()); + } return TaskRunner::NextAction::kDisposeOperationContext; }; _dbWorkTaskRunner.schedule(task); diff --git a/src/mongo/db/repl/database_cloner.cpp b/src/mongo/db/repl/database_cloner.cpp index fdaecdb5604..2873a205f21 100644 --- a/src/mongo/db/repl/database_cloner.cpp +++ b/src/mongo/db/repl/database_cloner.cpp @@ -51,6 +51,9 @@ namespace mongo { namespace repl { +// Failpoint which causes the initial sync function to hang before running listCollections. +MONGO_FP_DECLARE(initialSyncHangBeforeListCollections); + namespace { using LockGuard = stdx::lock_guard<stdx::mutex>; @@ -164,8 +167,13 @@ bool DatabaseCloner::_isActive_inlock() const { return State::kRunning == _state || State::kShuttingDown == _state; } -Status DatabaseCloner::startup() noexcept { +bool DatabaseCloner::_isShuttingDown() const { LockGuard lk(_mutex); + return State::kShuttingDown == _state; +} + +Status DatabaseCloner::startup() noexcept { + UniqueLock lk(_mutex); switch (_state) { case State::kPreStart: @@ -179,6 +187,20 @@ Status DatabaseCloner::startup() noexcept { return Status(ErrorCodes::ShutdownInProgress, "database cloner completed"); } + MONGO_FAIL_POINT_BLOCK(initialSyncHangBeforeListCollections, customArgs) { + const auto& data = customArgs.getData(); + const auto databaseElem = data["database"]; + if (!databaseElem || databaseElem.checkAndGetStringData() == _dbname) { + lk.unlock(); + log() << "initial sync - initialSyncHangBeforeListCollections fail point " + "enabled. Blocking until fail point is disabled."; + while (MONGO_FAIL_POINT(initialSyncHangBeforeListCollections) && !_isShuttingDown()) { + mongo::sleepsecs(1); + } + lk.lock(); + } + } + _stats.start = _executor->now(); LOG(1) << "Scheduling listCollections call for database: " << _dbname; Status scheduleResult = _listCollectionsFetcher.schedule(); diff --git a/src/mongo/db/repl/database_cloner.h b/src/mongo/db/repl/database_cloner.h index 73006606586..3c5c3a472e8 100644 --- a/src/mongo/db/repl/database_cloner.h +++ b/src/mongo/db/repl/database_cloner.h @@ -178,6 +178,11 @@ private: bool _isActive_inlock() const; /** + * Returns whether the DatabaseCloner is in shutdown. + */ + bool _isShuttingDown() const; + + /** * Read collection names and options from listCollections result. */ void _listCollectionsCallback(const StatusWith<Fetcher::QueryResponse>& fetchResult, diff --git a/src/mongo/db/repl/initial_syncer.cpp b/src/mongo/db/repl/initial_syncer.cpp index 96a03c06414..4660bf29f14 100644 --- a/src/mongo/db/repl/initial_syncer.cpp +++ b/src/mongo/db/repl/initial_syncer.cpp @@ -424,6 +424,11 @@ void InitialSyncer::_startInitialSyncAttemptCallback( _lastApplied = {}; _lastFetched = {}; + LOG(2) << "Resetting feature compatibility version to 3.4. If the sync source is in feature " + "compatibility version 3.6, we will find out when we clone the admin.system.version " + "collection."; + serverGlobalParams.featureCompatibility.reset(); + // Clear the oplog buffer. _oplogBuffer->clear(makeOpCtx().get()); diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp index 110909ffd6c..d0096714b72 100644 --- a/src/mongo/db/repl/oplog.cpp +++ b/src/mongo/db/repl/oplog.cpp @@ -57,6 +57,7 @@ #include "mongo/db/catalog/uuid_catalog.h" #include "mongo/db/client.h" #include "mongo/db/commands.h" +#include "mongo/db/commands/feature_compatibility_version.h" #include "mongo/db/concurrency/write_conflict_exception.h" #include "mongo/db/db_raii.h" #include "mongo/db/dbdirectclient.h" @@ -989,6 +990,19 @@ Status applyOperation_inlock(OperationContext* opCtx, collection = db->getCollection(opCtx, requestNss); } + // During upgrade from 3.4 to 3.6, the feature compatibility version cannot change during + // initial sync because we cannot do some operations with UUIDs and others without. + if (!inSteadyStateReplication && requestNss == FeatureCompatibilityVersion::kCollection) { + std::string oID; + auto status = bsonExtractStringField(o, "_id", &oID); + if (status.isOK() && oID == FeatureCompatibilityVersion::kParameterName) { + return Status(ErrorCodes::OplogOperationUnsupported, + str::stream() << "Applying operation on feature compatibility version " + "document not supported in initial sync: " + << redact(op)); + } + } + BSONObj o2; if (fieldO2.isABSONObj()) o2 = fieldO2.Obj(); @@ -1388,6 +1402,23 @@ Status applyCommand_inlock(OperationContext* opCtx, << redact(op); } + // During upgrade from 3.4 to 3.6, the feature compatibility version cannot change during + // initial sync because we cannot do some operations with UUIDs and others without. + // We do not attempt to parse the whitelisted ops because they do not have a collection + // namespace. If we drop the 'admin' database we will also log a 'drop' oplog entry for each + // collection dropped. 'applyOps' will try to apply each individual operation, and those + // will be caught then if they are a problem. + auto whitelistedOps = std::vector<std::string>{"dropDatabase", "applyOps", "dbCheck"}; + if (!inSteadyStateReplication && + (std::find(whitelistedOps.begin(), whitelistedOps.end(), o.firstElementFieldName()) == + whitelistedOps.end()) && + parseNs(nss.ns(), o) == FeatureCompatibilityVersion::kCollection) { + return Status(ErrorCodes::OplogOperationUnsupported, + str::stream() << "Applying command to feature compatibility version " + "collection not supported in initial sync: " + << redact(op)); + } + // Applying commands in repl is done under Global W-lock, so it is safe to not // perform the current DB checks after reacquiring the lock. invariant(opCtx->lockState()->isW()); diff --git a/src/mongo/db/repl/sync_tail_test.cpp b/src/mongo/db/repl/sync_tail_test.cpp index 783917ea969..70080b1559e 100644 --- a/src/mongo/db/repl/sync_tail_test.cpp +++ b/src/mongo/db/repl/sync_tail_test.cpp @@ -41,6 +41,7 @@ #include "mongo/db/catalog/database_holder.h" #include "mongo/db/catalog/document_validation.h" #include "mongo/db/client.h" +#include "mongo/db/commands/feature_compatibility_version.h" #include "mongo/db/concurrency/d_concurrency.h" #include "mongo/db/concurrency/write_conflict_exception.h" #include "mongo/db/curop.h" @@ -64,6 +65,7 @@ #include "mongo/db/service_context_d_test_fixture.h" #include "mongo/db/session_catalog.h" #include "mongo/stdx/mutex.h" +#include "mongo/unittest/death_test.h" #include "mongo/unittest/unittest.h" #include "mongo/util/concurrency/old_thread_pool.h" #include "mongo/util/md5.hpp" @@ -595,6 +597,36 @@ TEST_F(SyncTailTest, MultiSyncApplyUsesSyncApplyToApplyOperation) { ASSERT_TRUE(AutoGetCollectionForReadCommand(_opCtx.get(), nss).getCollection()); } +DEATH_TEST_F(SyncTailTest, + MultiSyncApplyFailsWhenCollectionCreationTriesToMakeUUID, + "Attempt to assign UUID to replicated collection") { + NamespaceString nss("foo." + _agent.getSuiteName() + "_" + _agent.getTestName()); + + serverGlobalParams.featureCompatibility.setVersion( + ServerGlobalParams::FeatureCompatibility::Version::k36); + + auto op = makeCreateCollectionOplogEntry({Timestamp(Seconds(1), 0), 1LL}, nss); + _opCtx.reset(); + MultiApplier::OperationPtrs ops = {&op}; + multiSyncApply(&ops, nullptr); +} + +TEST_F(SyncTailTest, MultiInitialSyncApplyFailsWhenCollectionCreationTriesToMakeUUID) { + NamespaceString nss("foo." + _agent.getSuiteName() + "_" + _agent.getTestName()); + + serverGlobalParams.featureCompatibility.setVersion( + ServerGlobalParams::FeatureCompatibility::Version::k36); + + auto op = makeCreateCollectionOplogEntry({Timestamp(Seconds(1), 0), 1LL}, nss); + + _opCtx.reset(); + MultiApplier::OperationPtrs ops = {&op}; + ASSERT_EQUALS(ErrorCodes::InvalidOptions, multiInitialSyncApply(&ops, nullptr, nullptr)); + + serverGlobalParams.featureCompatibility.setVersion( + ServerGlobalParams::FeatureCompatibility::Version::k34); +} + TEST_F(SyncTailTest, MultiSyncApplyDisablesDocumentValidationWhileApplyingOperations) { NamespaceString nss("local." + _agent.getSuiteName() + "_" + _agent.getTestName()); auto syncApply = [](OperationContext* opCtx, const BSONObj&, bool convertUpdatesToUpserts) { @@ -1416,6 +1448,51 @@ TEST_F(IdempotencyTest, CollModIndexNotFound) { testOpsAreIdempotent(ops); } +TEST_F(IdempotencyTest, ResyncOnDropFCVCollection) { + ASSERT_OK( + ReplicationCoordinator::get(_opCtx.get())->setFollowerMode(MemberState::RS_RECOVERING)); + + auto fcvNS = NamespaceString(FeatureCompatibilityVersion::kCollection); + auto cmd = BSON("drop" << fcvNS.coll()); + auto op = makeCommandOplogEntry( + nextOpTime(), NamespaceString(FeatureCompatibilityVersion::kCollection), cmd); + ASSERT_EQUALS(runOp(op), ErrorCodes::OplogOperationUnsupported); +} + +TEST_F(IdempotencyTest, ResyncOnInsertFCVDocument) { + auto fcvNS = NamespaceString(FeatureCompatibilityVersion::kCollection); + ::mongo::repl::createCollection(_opCtx.get(), fcvNS, CollectionOptions()); + ASSERT_OK( + ReplicationCoordinator::get(_opCtx.get())->setFollowerMode(MemberState::RS_RECOVERING)); + + auto op = makeInsertDocumentOplogEntry( + nextOpTime(), fcvNS, BSON("_id" << FeatureCompatibilityVersion::kParameterName)); + ASSERT_EQUALS(runOp(op), ErrorCodes::OplogOperationUnsupported); +} + +TEST_F(IdempotencyTest, InsertToFCVCollectionBesidesFCVDocumentSucceeds) { + auto fcvNS = NamespaceString(FeatureCompatibilityVersion::kCollection); + ::mongo::repl::createCollection(_opCtx.get(), fcvNS, CollectionOptions()); + ASSERT_OK( + ReplicationCoordinator::get(_opCtx.get())->setFollowerMode(MemberState::RS_RECOVERING)); + + auto op = makeInsertDocumentOplogEntry(nextOpTime(), + fcvNS, + BSON("_id" + << "other")); + ASSERT_OK(runOp(op)); +} + +TEST_F(IdempotencyTest, DropDatabaseSucceeds) { + auto ns = NamespaceString("foo.bar"); + ::mongo::repl::createCollection(_opCtx.get(), ns, CollectionOptions()); + ASSERT_OK( + ReplicationCoordinator::get(_opCtx.get())->setFollowerMode(MemberState::RS_RECOVERING)); + + auto op = makeCommandOplogEntry(nextOpTime(), ns, BSON("dropDatabase" << 1)); + ASSERT_OK(runOp(op)); +} + } // namespace } // namespace repl } // namespace mongo diff --git a/src/mongo/db/server_options.h b/src/mongo/db/server_options.h index c3cd1b89b8d..12d8018c180 100644 --- a/src/mongo/db/server_options.h +++ b/src/mongo/db/server_options.h @@ -171,6 +171,11 @@ struct ServerGlobalParams { return _version.load(); } + void reset() { + _version.store(Version::k34); + _targetVersion.store(Version::kUnset); + } + void setVersion(Version version) { return _version.store(version); } |