summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--jstests/replsets/drop_databases_two_phase.js149
-rw-r--r--src/mongo/db/catalog/drop_database.cpp89
-rw-r--r--src/mongo/db/catalog/drop_database_test.cpp75
3 files changed, 304 insertions, 9 deletions
diff --git a/jstests/replsets/drop_databases_two_phase.js b/jstests/replsets/drop_databases_two_phase.js
new file mode 100644
index 00000000000..aa4aae61b82
--- /dev/null
+++ b/jstests/replsets/drop_databases_two_phase.js
@@ -0,0 +1,149 @@
+/**
+ * Test to ensure that two phase drop behavior for databases on replica sets works properly.
+ *
+ * Uses a 3 node replica set with one arbiter to verify both phases of a 2-phase database drop:
+ * the 'Collections' and 'Database' phase. Executing a 'dropDatabase' command should put that
+ * database into a drop-pending state. In this state, all new collection creation requests will
+ * be rejected with an error with the code ErrorCodes.DatabaseDropPending. We will exit the
+ * 'Collections' phase once the last collection drop has been propagated to a majority. All
+ * collections in the database will be physically dropped at this point.
+ *
+ * During the 'Database' phase, collection creation is still disallowed. This phase removes the
+ * metadata for the database from the server and appends the 'dropDatabase' operation to the oplog.
+ * Unlike the 'Collections' phase, we do not wait for the 'dropDatabase' to propagate to a majority
+ * unless explicitly requested by the user with a write concern.
+ */
+
+(function() {
+ "use strict";
+
+ // Returns a list of all collections in a given database. Use 'args' as the
+ // 'listCollections' command arguments.
+ function listCollections(database, args) {
+ var args = args || {};
+ var failMsg = "'listCollections' command failed";
+ var res = assert.commandWorked(database.runCommand("listCollections", args), failMsg);
+ return res.cursor.firstBatch;
+ }
+
+ // Returns a list of 'drop-pending' collections. The collection names should be of the
+ // format "system.drop.<optime>.<collectionName>", where 'optime' is the optime of the
+ // collection drop operation, encoded as a string, and 'collectionName' is the original
+ // collection name.
+ function listDropPendingCollections(database) {
+ var pendingDropRegex = new RegExp("system\.drop\..*\." + collNameToDrop + "$");
+ var collections = listCollections(database, {includePendingDrops: true});
+ return collections.filter(c => pendingDropRegex.test(c.name));
+ }
+
+ // Returns a list of all collection names in a given database.
+ function listCollectionNames(database, args) {
+ return listCollections(database, args).map(c => c.name);
+ }
+
+ // Sets a fail point on a specified node.
+ function setFailPoint(node, failpoint, mode) {
+ assert.commandWorked(node.adminCommand({configureFailPoint: failpoint, mode: mode}));
+ }
+
+ var dbNameToDrop = 'dbToDrop';
+ var replTest = new ReplSetTest({nodes: [{}, {}, {arbiter: true}]});
+
+ // Initiate the replica set.
+ replTest.startSet();
+ replTest.initiate();
+ replTest.awaitReplication();
+
+ var primary = replTest.getPrimary();
+ var secondary = replTest.getSecondary();
+
+ var dbToDrop = primary.getDB(dbNameToDrop);
+ var collNameToDrop = "collectionToDrop";
+
+ // Create the collection that will be dropped and let it replicate.
+ var collToDrop = dbToDrop.getCollection(collNameToDrop);
+ assert.writeOK(
+ collToDrop.insert({_id: 0}, {writeConcern: {w: 2, wtimeout: replTest.kDefaultTimeoutMS}}));
+ assert.eq(1, collToDrop.find().itcount());
+
+ // Pause application on secondary so that commit point doesn't advance, meaning that a dropped
+ // database on the primary will remain in 'drop-pending' state.
+ jsTestLog("Pausing oplog application on the secondary node.");
+ setFailPoint(secondary, "rsSyncApplyStop", "alwaysOn");
+
+ // Make sure the collection was created.
+ assert.contains(collNameToDrop,
+ listCollectionNames(dbToDrop),
+ "Collection '" + collNameToDrop + "' wasn't created properly");
+
+ /**
+ * DROP DATABASE 'Collections' PHASE
+ */
+
+ // Drop the collection on the primary.
+ var dropDatabaseFn = function() {
+ var dbNameToDrop = 'dbToDrop';
+ var primary = db.getMongo();
+ jsTestLog(
+ 'Dropping database ' + dbNameToDrop + ' on primary node ' + primary.host +
+ '. This command will block because oplog application is paused on the secondary.');
+ var dbToDrop = db.getSiblingDB(dbNameToDrop);
+ assert.commandWorked(dbToDrop.dropDatabase());
+ jsTestLog('Database ' + dbNameToDrop + ' successfully dropped on primary node ' +
+ primary.host);
+ };
+ var dropDatabaseProcess = startParallelShell(dropDatabaseFn, primary.port);
+
+ // Check that primary has started two phase drop of the collection.
+ jsTestLog('Waiting for primary ' + primary.host + ' to prepare two phase drop of collection ' +
+ collToDrop.getFullName());
+ assert.soonNoExcept(
+ function() {
+ return collToDrop.find().itcount() == 0;
+ },
+ 'Primary ' + primary.host + ' failed to prepare two phase drop of collection ' +
+ collToDrop.getFullName());
+ var dropPendingCollections = listDropPendingCollections(dbToDrop);
+ assert.eq(1,
+ dropPendingCollections.length,
+ "Collection was not found in the 'system.drop' namespace. " +
+ "Full drop-pending collection list: " + tojson(dropPendingCollections));
+ jsTestLog('Primary ' + primary.host + ' successfully started two phase drop of collection ' +
+ collToDrop.getFullName());
+
+ // Collection creation should fail with an error of ErrorCodes.DatabaseDropPending while the
+ // database is in a drop pending state.
+ assert.commandFailedWithCode(
+ dbToDrop.createCollection('collectionToCreateWhileDroppingDatabase'),
+ ErrorCodes.DatabaseDropPending,
+ 'collection creation should fail while we are in the process of dropping the database');
+
+ /**
+ * DROP DATABASE 'Database' PHASE
+ */
+
+ // Let the secondary apply the collection drop operation, so that the replica set commit point
+ // will advance, and the 'Database' phase of the database drop will complete on the primary.
+ jsTestLog("Restarting oplog application on the secondary node.");
+ setFailPoint(secondary, "rsSyncApplyStop", "off");
+
+ jsTestLog("Waiting for collection drop operation to replicate to all nodes.");
+ replTest.awaitReplication();
+
+ // Make sure the collection has been fully dropped. It should not appear as
+ // a normal collection or under the 'system.drop' namespace any longer. Physical collection
+ // drops may happen asynchronously, any time after the drop operation is committed, so we wait
+ // to make sure the collection is eventually dropped.
+ assert.soonNoExcept(function() {
+ var dropPendingCollections = listDropPendingCollections(dbToDrop);
+ jsTestLog('Drop pending collections: ' + tojson(dropPendingCollections));
+ return dropPendingCollections.length == 0;
+ });
+
+ jsTestLog('Waiting for dropDatabase command on ' + primary.host + ' to complete.');
+ var exitCode = dropDatabaseProcess();
+ assert.eq(0, exitCode, 'dropDatabase command on ' + primary.host + ' failed.');
+ jsTestLog('Completed dropDatabase command on ' + primary.host);
+
+ replTest.stopSet();
+}());
diff --git a/src/mongo/db/catalog/drop_database.cpp b/src/mongo/db/catalog/drop_database.cpp
index 90e9adb34f7..8ccc1c8f944 100644
--- a/src/mongo/db/catalog/drop_database.cpp
+++ b/src/mongo/db/catalog/drop_database.cpp
@@ -41,11 +41,42 @@
#include "mongo/db/op_observer.h"
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/service_context.h"
+#include "mongo/db/write_concern_options.h"
#include "mongo/util/log.h"
#include "mongo/util/scopeguard.h"
namespace mongo {
+namespace {
+
+// This is used to wait for the collection drops to replicate to a majority of the replica set.
+// Note: Even though we're setting UNSET here, kMajority implies JOURNAL if journaling is supported
+// by mongod and writeConcernMajorityJournalDefault is set to true in the ReplSetConfig.
+const WriteConcernOptions kDropDatabaseWriteConcern(WriteConcernOptions::kMajority,
+ WriteConcernOptions::SyncMode::UNSET,
+ Minutes(10));
+
+/**
+ * Removes database from catalog and writes dropDatabase entry to oplog.
+ */
+Status _finishDropDatabase(OperationContext* opCtx, const std::string& dbName, Database* db) {
+ // If Database::dropDatabase() fails, we should reset the drop-pending state on Database.
+ auto dropPendingGuard = MakeGuard([db, opCtx] { db->setDropPending(opCtx, false); });
+
+ Database::dropDatabase(opCtx, db);
+ dropPendingGuard.Dismiss();
+
+ log() << "dropDatabase " << dbName << " - finished";
+
+ WriteUnitOfWork wunit(opCtx);
+ getGlobalServiceContext()->getOpObserver()->onDropDatabase(opCtx, dbName);
+ wunit.commit();
+
+ return Status::OK();
+}
+
+} // namespace
+
Status dropDatabase(OperationContext* opCtx, const std::string& dbName) {
uassert(ErrorCodes::IllegalOperation,
"Cannot drop a database in read-only mode",
@@ -57,6 +88,9 @@ Status dropDatabase(OperationContext* opCtx, const std::string& dbName) {
CurOp::get(opCtx)->setNS_inlock(dbName);
}
+ auto replCoord = repl::ReplicationCoordinator::get(opCtx);
+ std::size_t numCollectionsToDrop = 0;
+
MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
Lock::GlobalWrite lk(opCtx);
AutoGetDb autoDB(opCtx, dbName, MODE_X);
@@ -67,7 +101,6 @@ Status dropDatabase(OperationContext* opCtx, const std::string& dbName) {
<< " because it does not exist");
}
- auto replCoord = repl::ReplicationCoordinator::get(opCtx);
bool userInitiatedWritesAndNotPrimary =
opCtx->writesAreReplicated() && !replCoord->canAcceptWritesForDatabase(opCtx, dbName);
@@ -79,7 +112,8 @@ Status dropDatabase(OperationContext* opCtx, const std::string& dbName) {
log() << "dropDatabase " << dbName << " - starting";
db->setDropPending(opCtx, true);
- // If Database::dropDatabase() fails, we should reset the drop-pending state on Database.
+ // If Database::dropCollectionEventIfSystem() fails, we should reset the drop-pending state
+ // on Database.
auto dropPendingGuard = MakeGuard([&db, opCtx] { db->setDropPending(opCtx, false); });
for (auto collection : *db) {
@@ -91,20 +125,57 @@ Status dropDatabase(OperationContext* opCtx, const std::string& dbName) {
WriteUnitOfWork wunit(opCtx);
fassertStatusOK(40476, db->dropCollectionEvenIfSystem(opCtx, nss));
wunit.commit();
+ numCollectionsToDrop++;
}
- Database::dropDatabase(opCtx, db);
dropPendingGuard.Dismiss();
- log() << "dropDatabase " << dbName << " - finished";
- WriteUnitOfWork wunit(opCtx);
+ // If there are no collection drops to wait for, we complete the drop database operation.
+ if (numCollectionsToDrop == 0U) {
+ return _finishDropDatabase(opCtx, dbName, db);
+ }
+ }
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(opCtx, "dropDatabase_collection", dbName);
+
+ // If waitForWriteConcern() returns an error or throws an exception, we should reset the
+ // drop-pending state on Database.
+ auto dropPendingGuardWhileAwaitingReplication = MakeGuard([dbName, opCtx] {
+ Lock::GlobalWrite lk(opCtx);
+ AutoGetDb autoDB(opCtx, dbName, MODE_X);
+ if (auto db = autoDB.getDb()) {
+ db->setDropPending(opCtx, false);
+ }
+ });
+
+ auto status =
+ replCoord->awaitReplicationOfLastOpForClient(opCtx, kDropDatabaseWriteConcern).status;
+ if (!status.isOK()) {
+ return Status(status.code(),
+ str::stream() << "dropDatabase " << dbName << " failed waiting for "
+ << numCollectionsToDrop
+ << " collection drops to replicate: "
+ << status.reason());
+ }
- getGlobalServiceContext()->getOpObserver()->onDropDatabase(opCtx, dbName);
+ log() << "dropDatabase " << dbName << " - successfully dropped " << numCollectionsToDrop
+ << " collections. dropping database";
+ dropPendingGuardWhileAwaitingReplication.Dismiss();
- wunit.commit();
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ Lock::GlobalWrite lk(opCtx);
+ AutoGetDb autoDB(opCtx, dbName, MODE_X);
+ if (auto db = autoDB.getDb()) {
+ return _finishDropDatabase(opCtx, dbName, db);
+ }
+
+ return Status(ErrorCodes::NamespaceNotFound,
+ str::stream() << "Could not drop database " << dbName
+ << " because it does not exist after dropping "
+ << numCollectionsToDrop
+ << " collection(s).");
}
- MONGO_WRITE_CONFLICT_RETRY_LOOP_END(opCtx, "dropDatabase", dbName);
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(opCtx, "dropDatabase_database", dbName);
- return Status::OK();
+ MONGO_UNREACHABLE;
}
} // namespace mongo
diff --git a/src/mongo/db/catalog/drop_database_test.cpp b/src/mongo/db/catalog/drop_database_test.cpp
index 43a2e4c1dfb..ab83ac01521 100644
--- a/src/mongo/db/catalog/drop_database_test.cpp
+++ b/src/mongo/db/catalog/drop_database_test.cpp
@@ -50,6 +50,7 @@
#include "mongo/stdx/memory.h"
#include "mongo/unittest/unittest.h"
#include "mongo/util/assert_util.h"
+#include "mongo/util/mongoutils/str.h"
namespace {
@@ -179,6 +180,17 @@ void _createCollection(OperationContext* opCtx, const NamespaceString& nss) {
ASSERT_TRUE(AutoGetCollectionForRead(opCtx, nss).getCollection());
}
+/**
+ * Removes database from catalog, bypassing dropDatabase().
+ */
+void _removeDatabaseFromCatalog(OperationContext* opCtx, StringData dbName) {
+ Lock::GlobalWrite lk(opCtx);
+ AutoGetDb autoDB(opCtx, dbName, MODE_X);
+ auto db = autoDB.getDb();
+ ASSERT_TRUE(db);
+ Database::dropDatabase(opCtx, db);
+}
+
TEST_F(DropDatabaseTest, DropDatabaseReturnsNamespaceNotFoundIfDatabaseDoesNotExist) {
ASSERT_FALSE(AutoGetDb(_opCtx.get(), _nss.db(), MODE_X).getDb());
ASSERT_EQUALS(ErrorCodes::NamespaceNotFound, dropDatabase(_opCtx.get(), _nss.db().toString()));
@@ -274,4 +286,67 @@ TEST_F(DropDatabaseTest, DropDatabaseResetsDropPendingStateOnException) {
ASSERT_FALSE(db->isDropPending(_opCtx.get()));
}
+void _testDropDatabaseResetsDropPendingStateIfAwaitReplicationFails(OperationContext* opCtx,
+ const NamespaceString& nss,
+ bool expectDbPresent) {
+ _createCollection(opCtx, nss);
+
+ ASSERT_TRUE(AutoGetDb(opCtx, nss.db(), MODE_X).getDb());
+
+ ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, dropDatabase(opCtx, nss.db().toString()));
+
+ AutoGetDb autoDb(opCtx, nss.db(), MODE_X);
+ auto db = autoDb.getDb();
+ if (expectDbPresent) {
+ ASSERT_TRUE(db);
+ ASSERT_FALSE(db->isDropPending(opCtx));
+ } else {
+ ASSERT_FALSE(db);
+ }
+}
+
+TEST_F(DropDatabaseTest,
+ DropDatabaseResetsDropPendingStateIfAwaitReplicationFailsAndDatabaseIsPresent) {
+ // Update ReplicationCoordinatorMock so that awaitReplicationOfLastOpForClient() fails.
+ _replCoord->setAwaitReplicationReturnValueFunction([] {
+ return repl::ReplicationCoordinator::StatusAndDuration(
+ Status(ErrorCodes::WriteConcernFailed, ""), Milliseconds(0));
+ });
+
+ _testDropDatabaseResetsDropPendingStateIfAwaitReplicationFails(_opCtx.get(), _nss, true);
+}
+
+TEST_F(DropDatabaseTest,
+ DropDatabaseResetsDropPendingStateIfAwaitReplicationFailsAndDatabaseIsMissing) {
+ // Update ReplicationCoordinatorMock so that awaitReplicationOfLastOpForClient() fails.
+ _replCoord->setAwaitReplicationReturnValueFunction([this] {
+ _removeDatabaseFromCatalog(_opCtx.get(), _nss.db());
+ return repl::ReplicationCoordinator::StatusAndDuration(
+ Status(ErrorCodes::WriteConcernFailed, ""), Milliseconds(0));
+ });
+
+ _testDropDatabaseResetsDropPendingStateIfAwaitReplicationFails(_opCtx.get(), _nss, false);
+}
+
+TEST_F(DropDatabaseTest,
+ DropDatabaseReturnsNamespaceNotFoundIfDatabaseIsRemovedAfterCollectionsDropsAreReplicated) {
+ // Update ReplicationCoordinatorMock so that awaitReplicationOfLastOpForClient() fails.
+ _replCoord->setAwaitReplicationReturnValueFunction([this] {
+ _removeDatabaseFromCatalog(_opCtx.get(), _nss.db());
+ return repl::ReplicationCoordinator::StatusAndDuration(Status::OK(), Milliseconds(0));
+ });
+
+ _createCollection(_opCtx.get(), _nss);
+
+ ASSERT_TRUE(AutoGetDb(_opCtx.get(), _nss.db(), MODE_X).getDb());
+
+ auto status = dropDatabase(_opCtx.get(), _nss.db().toString());
+ ASSERT_EQUALS(ErrorCodes::NamespaceNotFound, status);
+ ASSERT_EQUALS(status.reason(),
+ str::stream() << "Could not drop database " << _nss.db()
+ << " because it does not exist after dropping 1 collection(s).");
+
+ ASSERT_FALSE(AutoGetDb(_opCtx.get(), _nss.db(), MODE_X).getDb());
+}
+
} // namespace