diff options
author | Benety Goh <benety@mongodb.com> | 2017-06-19 15:18:58 -0400 |
---|---|---|
committer | Benety Goh <benety@mongodb.com> | 2017-06-27 15:10:19 -0400 |
commit | beea8e5d090d269ca0a0390785bd417fcf4cfcf2 (patch) | |
tree | 9872b8c74d53b803a624c7fadc268d617a2ce6a6 | |
parent | 8d43da1a95e449b95c1bd9b6af9d6e9604794fed (diff) | |
download | mongo-beea8e5d090d269ca0a0390785bd417fcf4cfcf2.tar.gz |
SERVER-29277 dropDatabase() waits for collection drops to be replicated before dropping database
-rw-r--r-- | jstests/replsets/drop_databases_two_phase.js | 149 | ||||
-rw-r--r-- | src/mongo/db/catalog/drop_database.cpp | 89 | ||||
-rw-r--r-- | src/mongo/db/catalog/drop_database_test.cpp | 75 |
3 files changed, 304 insertions, 9 deletions
diff --git a/jstests/replsets/drop_databases_two_phase.js b/jstests/replsets/drop_databases_two_phase.js new file mode 100644 index 00000000000..aa4aae61b82 --- /dev/null +++ b/jstests/replsets/drop_databases_two_phase.js @@ -0,0 +1,149 @@ +/** + * Test to ensure that two phase drop behavior for databases on replica sets works properly. + * + * Uses a 3 node replica set with one arbiter to verify both phases of a 2-phase database drop: + * the 'Collections' and 'Database' phase. Executing a 'dropDatabase' command should put that + * database into a drop-pending state. In this state, all new collection creation requests will + * be rejected with an error with the code ErrorCodes.DatabaseDropPending. We will exit the + * 'Collections' phase once the last collection drop has been propagated to a majority. All + * collections in the database will be physically dropped at this point. + * + * During the 'Database' phase, collection creation is still disallowed. This phase removes the + * metadata for the database from the server and appends the 'dropDatabase' operation to the oplog. + * Unlike the 'Collections' phase, we do not wait for the 'dropDatabase' to propagate to a majority + * unless explicitly requested by the user with a write concern. + */ + +(function() { + "use strict"; + + // Returns a list of all collections in a given database. Use 'args' as the + // 'listCollections' command arguments. + function listCollections(database, args) { + var args = args || {}; + var failMsg = "'listCollections' command failed"; + var res = assert.commandWorked(database.runCommand("listCollections", args), failMsg); + return res.cursor.firstBatch; + } + + // Returns a list of 'drop-pending' collections. The collection names should be of the + // format "system.drop.<optime>.<collectionName>", where 'optime' is the optime of the + // collection drop operation, encoded as a string, and 'collectionName' is the original + // collection name. + function listDropPendingCollections(database) { + var pendingDropRegex = new RegExp("system\.drop\..*\." + collNameToDrop + "$"); + var collections = listCollections(database, {includePendingDrops: true}); + return collections.filter(c => pendingDropRegex.test(c.name)); + } + + // Returns a list of all collection names in a given database. + function listCollectionNames(database, args) { + return listCollections(database, args).map(c => c.name); + } + + // Sets a fail point on a specified node. + function setFailPoint(node, failpoint, mode) { + assert.commandWorked(node.adminCommand({configureFailPoint: failpoint, mode: mode})); + } + + var dbNameToDrop = 'dbToDrop'; + var replTest = new ReplSetTest({nodes: [{}, {}, {arbiter: true}]}); + + // Initiate the replica set. + replTest.startSet(); + replTest.initiate(); + replTest.awaitReplication(); + + var primary = replTest.getPrimary(); + var secondary = replTest.getSecondary(); + + var dbToDrop = primary.getDB(dbNameToDrop); + var collNameToDrop = "collectionToDrop"; + + // Create the collection that will be dropped and let it replicate. + var collToDrop = dbToDrop.getCollection(collNameToDrop); + assert.writeOK( + collToDrop.insert({_id: 0}, {writeConcern: {w: 2, wtimeout: replTest.kDefaultTimeoutMS}})); + assert.eq(1, collToDrop.find().itcount()); + + // Pause application on secondary so that commit point doesn't advance, meaning that a dropped + // database on the primary will remain in 'drop-pending' state. + jsTestLog("Pausing oplog application on the secondary node."); + setFailPoint(secondary, "rsSyncApplyStop", "alwaysOn"); + + // Make sure the collection was created. + assert.contains(collNameToDrop, + listCollectionNames(dbToDrop), + "Collection '" + collNameToDrop + "' wasn't created properly"); + + /** + * DROP DATABASE 'Collections' PHASE + */ + + // Drop the collection on the primary. + var dropDatabaseFn = function() { + var dbNameToDrop = 'dbToDrop'; + var primary = db.getMongo(); + jsTestLog( + 'Dropping database ' + dbNameToDrop + ' on primary node ' + primary.host + + '. This command will block because oplog application is paused on the secondary.'); + var dbToDrop = db.getSiblingDB(dbNameToDrop); + assert.commandWorked(dbToDrop.dropDatabase()); + jsTestLog('Database ' + dbNameToDrop + ' successfully dropped on primary node ' + + primary.host); + }; + var dropDatabaseProcess = startParallelShell(dropDatabaseFn, primary.port); + + // Check that primary has started two phase drop of the collection. + jsTestLog('Waiting for primary ' + primary.host + ' to prepare two phase drop of collection ' + + collToDrop.getFullName()); + assert.soonNoExcept( + function() { + return collToDrop.find().itcount() == 0; + }, + 'Primary ' + primary.host + ' failed to prepare two phase drop of collection ' + + collToDrop.getFullName()); + var dropPendingCollections = listDropPendingCollections(dbToDrop); + assert.eq(1, + dropPendingCollections.length, + "Collection was not found in the 'system.drop' namespace. " + + "Full drop-pending collection list: " + tojson(dropPendingCollections)); + jsTestLog('Primary ' + primary.host + ' successfully started two phase drop of collection ' + + collToDrop.getFullName()); + + // Collection creation should fail with an error of ErrorCodes.DatabaseDropPending while the + // database is in a drop pending state. + assert.commandFailedWithCode( + dbToDrop.createCollection('collectionToCreateWhileDroppingDatabase'), + ErrorCodes.DatabaseDropPending, + 'collection creation should fail while we are in the process of dropping the database'); + + /** + * DROP DATABASE 'Database' PHASE + */ + + // Let the secondary apply the collection drop operation, so that the replica set commit point + // will advance, and the 'Database' phase of the database drop will complete on the primary. + jsTestLog("Restarting oplog application on the secondary node."); + setFailPoint(secondary, "rsSyncApplyStop", "off"); + + jsTestLog("Waiting for collection drop operation to replicate to all nodes."); + replTest.awaitReplication(); + + // Make sure the collection has been fully dropped. It should not appear as + // a normal collection or under the 'system.drop' namespace any longer. Physical collection + // drops may happen asynchronously, any time after the drop operation is committed, so we wait + // to make sure the collection is eventually dropped. + assert.soonNoExcept(function() { + var dropPendingCollections = listDropPendingCollections(dbToDrop); + jsTestLog('Drop pending collections: ' + tojson(dropPendingCollections)); + return dropPendingCollections.length == 0; + }); + + jsTestLog('Waiting for dropDatabase command on ' + primary.host + ' to complete.'); + var exitCode = dropDatabaseProcess(); + assert.eq(0, exitCode, 'dropDatabase command on ' + primary.host + ' failed.'); + jsTestLog('Completed dropDatabase command on ' + primary.host); + + replTest.stopSet(); +}()); diff --git a/src/mongo/db/catalog/drop_database.cpp b/src/mongo/db/catalog/drop_database.cpp index 90e9adb34f7..8ccc1c8f944 100644 --- a/src/mongo/db/catalog/drop_database.cpp +++ b/src/mongo/db/catalog/drop_database.cpp @@ -41,11 +41,42 @@ #include "mongo/db/op_observer.h" #include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/service_context.h" +#include "mongo/db/write_concern_options.h" #include "mongo/util/log.h" #include "mongo/util/scopeguard.h" namespace mongo { +namespace { + +// This is used to wait for the collection drops to replicate to a majority of the replica set. +// Note: Even though we're setting UNSET here, kMajority implies JOURNAL if journaling is supported +// by mongod and writeConcernMajorityJournalDefault is set to true in the ReplSetConfig. +const WriteConcernOptions kDropDatabaseWriteConcern(WriteConcernOptions::kMajority, + WriteConcernOptions::SyncMode::UNSET, + Minutes(10)); + +/** + * Removes database from catalog and writes dropDatabase entry to oplog. + */ +Status _finishDropDatabase(OperationContext* opCtx, const std::string& dbName, Database* db) { + // If Database::dropDatabase() fails, we should reset the drop-pending state on Database. + auto dropPendingGuard = MakeGuard([db, opCtx] { db->setDropPending(opCtx, false); }); + + Database::dropDatabase(opCtx, db); + dropPendingGuard.Dismiss(); + + log() << "dropDatabase " << dbName << " - finished"; + + WriteUnitOfWork wunit(opCtx); + getGlobalServiceContext()->getOpObserver()->onDropDatabase(opCtx, dbName); + wunit.commit(); + + return Status::OK(); +} + +} // namespace + Status dropDatabase(OperationContext* opCtx, const std::string& dbName) { uassert(ErrorCodes::IllegalOperation, "Cannot drop a database in read-only mode", @@ -57,6 +88,9 @@ Status dropDatabase(OperationContext* opCtx, const std::string& dbName) { CurOp::get(opCtx)->setNS_inlock(dbName); } + auto replCoord = repl::ReplicationCoordinator::get(opCtx); + std::size_t numCollectionsToDrop = 0; + MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN { Lock::GlobalWrite lk(opCtx); AutoGetDb autoDB(opCtx, dbName, MODE_X); @@ -67,7 +101,6 @@ Status dropDatabase(OperationContext* opCtx, const std::string& dbName) { << " because it does not exist"); } - auto replCoord = repl::ReplicationCoordinator::get(opCtx); bool userInitiatedWritesAndNotPrimary = opCtx->writesAreReplicated() && !replCoord->canAcceptWritesForDatabase(opCtx, dbName); @@ -79,7 +112,8 @@ Status dropDatabase(OperationContext* opCtx, const std::string& dbName) { log() << "dropDatabase " << dbName << " - starting"; db->setDropPending(opCtx, true); - // If Database::dropDatabase() fails, we should reset the drop-pending state on Database. + // If Database::dropCollectionEventIfSystem() fails, we should reset the drop-pending state + // on Database. auto dropPendingGuard = MakeGuard([&db, opCtx] { db->setDropPending(opCtx, false); }); for (auto collection : *db) { @@ -91,20 +125,57 @@ Status dropDatabase(OperationContext* opCtx, const std::string& dbName) { WriteUnitOfWork wunit(opCtx); fassertStatusOK(40476, db->dropCollectionEvenIfSystem(opCtx, nss)); wunit.commit(); + numCollectionsToDrop++; } - Database::dropDatabase(opCtx, db); dropPendingGuard.Dismiss(); - log() << "dropDatabase " << dbName << " - finished"; - WriteUnitOfWork wunit(opCtx); + // If there are no collection drops to wait for, we complete the drop database operation. + if (numCollectionsToDrop == 0U) { + return _finishDropDatabase(opCtx, dbName, db); + } + } + MONGO_WRITE_CONFLICT_RETRY_LOOP_END(opCtx, "dropDatabase_collection", dbName); + + // If waitForWriteConcern() returns an error or throws an exception, we should reset the + // drop-pending state on Database. + auto dropPendingGuardWhileAwaitingReplication = MakeGuard([dbName, opCtx] { + Lock::GlobalWrite lk(opCtx); + AutoGetDb autoDB(opCtx, dbName, MODE_X); + if (auto db = autoDB.getDb()) { + db->setDropPending(opCtx, false); + } + }); + + auto status = + replCoord->awaitReplicationOfLastOpForClient(opCtx, kDropDatabaseWriteConcern).status; + if (!status.isOK()) { + return Status(status.code(), + str::stream() << "dropDatabase " << dbName << " failed waiting for " + << numCollectionsToDrop + << " collection drops to replicate: " + << status.reason()); + } - getGlobalServiceContext()->getOpObserver()->onDropDatabase(opCtx, dbName); + log() << "dropDatabase " << dbName << " - successfully dropped " << numCollectionsToDrop + << " collections. dropping database"; + dropPendingGuardWhileAwaitingReplication.Dismiss(); - wunit.commit(); + MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN { + Lock::GlobalWrite lk(opCtx); + AutoGetDb autoDB(opCtx, dbName, MODE_X); + if (auto db = autoDB.getDb()) { + return _finishDropDatabase(opCtx, dbName, db); + } + + return Status(ErrorCodes::NamespaceNotFound, + str::stream() << "Could not drop database " << dbName + << " because it does not exist after dropping " + << numCollectionsToDrop + << " collection(s)."); } - MONGO_WRITE_CONFLICT_RETRY_LOOP_END(opCtx, "dropDatabase", dbName); + MONGO_WRITE_CONFLICT_RETRY_LOOP_END(opCtx, "dropDatabase_database", dbName); - return Status::OK(); + MONGO_UNREACHABLE; } } // namespace mongo diff --git a/src/mongo/db/catalog/drop_database_test.cpp b/src/mongo/db/catalog/drop_database_test.cpp index 43a2e4c1dfb..ab83ac01521 100644 --- a/src/mongo/db/catalog/drop_database_test.cpp +++ b/src/mongo/db/catalog/drop_database_test.cpp @@ -50,6 +50,7 @@ #include "mongo/stdx/memory.h" #include "mongo/unittest/unittest.h" #include "mongo/util/assert_util.h" +#include "mongo/util/mongoutils/str.h" namespace { @@ -179,6 +180,17 @@ void _createCollection(OperationContext* opCtx, const NamespaceString& nss) { ASSERT_TRUE(AutoGetCollectionForRead(opCtx, nss).getCollection()); } +/** + * Removes database from catalog, bypassing dropDatabase(). + */ +void _removeDatabaseFromCatalog(OperationContext* opCtx, StringData dbName) { + Lock::GlobalWrite lk(opCtx); + AutoGetDb autoDB(opCtx, dbName, MODE_X); + auto db = autoDB.getDb(); + ASSERT_TRUE(db); + Database::dropDatabase(opCtx, db); +} + TEST_F(DropDatabaseTest, DropDatabaseReturnsNamespaceNotFoundIfDatabaseDoesNotExist) { ASSERT_FALSE(AutoGetDb(_opCtx.get(), _nss.db(), MODE_X).getDb()); ASSERT_EQUALS(ErrorCodes::NamespaceNotFound, dropDatabase(_opCtx.get(), _nss.db().toString())); @@ -274,4 +286,67 @@ TEST_F(DropDatabaseTest, DropDatabaseResetsDropPendingStateOnException) { ASSERT_FALSE(db->isDropPending(_opCtx.get())); } +void _testDropDatabaseResetsDropPendingStateIfAwaitReplicationFails(OperationContext* opCtx, + const NamespaceString& nss, + bool expectDbPresent) { + _createCollection(opCtx, nss); + + ASSERT_TRUE(AutoGetDb(opCtx, nss.db(), MODE_X).getDb()); + + ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, dropDatabase(opCtx, nss.db().toString())); + + AutoGetDb autoDb(opCtx, nss.db(), MODE_X); + auto db = autoDb.getDb(); + if (expectDbPresent) { + ASSERT_TRUE(db); + ASSERT_FALSE(db->isDropPending(opCtx)); + } else { + ASSERT_FALSE(db); + } +} + +TEST_F(DropDatabaseTest, + DropDatabaseResetsDropPendingStateIfAwaitReplicationFailsAndDatabaseIsPresent) { + // Update ReplicationCoordinatorMock so that awaitReplicationOfLastOpForClient() fails. + _replCoord->setAwaitReplicationReturnValueFunction([] { + return repl::ReplicationCoordinator::StatusAndDuration( + Status(ErrorCodes::WriteConcernFailed, ""), Milliseconds(0)); + }); + + _testDropDatabaseResetsDropPendingStateIfAwaitReplicationFails(_opCtx.get(), _nss, true); +} + +TEST_F(DropDatabaseTest, + DropDatabaseResetsDropPendingStateIfAwaitReplicationFailsAndDatabaseIsMissing) { + // Update ReplicationCoordinatorMock so that awaitReplicationOfLastOpForClient() fails. + _replCoord->setAwaitReplicationReturnValueFunction([this] { + _removeDatabaseFromCatalog(_opCtx.get(), _nss.db()); + return repl::ReplicationCoordinator::StatusAndDuration( + Status(ErrorCodes::WriteConcernFailed, ""), Milliseconds(0)); + }); + + _testDropDatabaseResetsDropPendingStateIfAwaitReplicationFails(_opCtx.get(), _nss, false); +} + +TEST_F(DropDatabaseTest, + DropDatabaseReturnsNamespaceNotFoundIfDatabaseIsRemovedAfterCollectionsDropsAreReplicated) { + // Update ReplicationCoordinatorMock so that awaitReplicationOfLastOpForClient() fails. + _replCoord->setAwaitReplicationReturnValueFunction([this] { + _removeDatabaseFromCatalog(_opCtx.get(), _nss.db()); + return repl::ReplicationCoordinator::StatusAndDuration(Status::OK(), Milliseconds(0)); + }); + + _createCollection(_opCtx.get(), _nss); + + ASSERT_TRUE(AutoGetDb(_opCtx.get(), _nss.db(), MODE_X).getDb()); + + auto status = dropDatabase(_opCtx.get(), _nss.db().toString()); + ASSERT_EQUALS(ErrorCodes::NamespaceNotFound, status); + ASSERT_EQUALS(status.reason(), + str::stream() << "Could not drop database " << _nss.db() + << " because it does not exist after dropping 1 collection(s)."); + + ASSERT_FALSE(AutoGetDb(_opCtx.get(), _nss.db(), MODE_X).getDb()); +} + } // namespace |