diff options
-rw-r--r-- | jstests/replsets/rollback_waits_for_bgindex_completion.js | 97 | ||||
-rw-r--r-- | src/mongo/db/background.cpp | 15 | ||||
-rw-r--r-- | src/mongo/db/background.h | 11 | ||||
-rw-r--r-- | src/mongo/db/catalog/index_create_impl.cpp | 13 | ||||
-rw-r--r-- | src/mongo/db/repl/rollback_impl.cpp | 39 | ||||
-rw-r--r-- | src/mongo/db/repl/rollback_impl.h | 15 |
6 files changed, 170 insertions, 20 deletions
diff --git a/jstests/replsets/rollback_waits_for_bgindex_completion.js b/jstests/replsets/rollback_waits_for_bgindex_completion.js new file mode 100644 index 00000000000..4e82ffbcd8a --- /dev/null +++ b/jstests/replsets/rollback_waits_for_bgindex_completion.js @@ -0,0 +1,97 @@ +/** + * Test to ensure that rollback waits for in-progress background index builds to finish before + * starting the rollback process. Only applies to Recoverable Rollback via WiredTiger checkpoints. + */ +(function() { + 'use strict'; + + let skip = true; + + // TODO: Re-enable test once Recoverable Rollback via WT Checkpoints is complete and ready to + // test. See SERVER-32844. + if (skip) { + return; + } + + load('jstests/libs/check_log.js'); + load("jstests/replsets/rslib.js"); + load('jstests/replsets/libs/rollback_test.js'); + + const dbName = "dbWithBgIndex"; + const collName = 'coll'; + let bgIndexThread; + + function hangIndexBuildsFailpoint(node, fpMode) { + assert.commandWorked(node.adminCommand( + {configureFailPoint: 'hangAfterStartingIndexBuildUnlocked', mode: fpMode})); + } + + /** + * A function to create a background index on the test collection in a parallel shell. + */ + function createBgIndexFn() { + // Re-define constants, since they are not shared between shells. + const dbName = "dbWithBgIndex"; + const collName = "coll"; + let testDB = db.getSiblingDB(dbName); + jsTestLog("Starting background index build from parallel shell."); + assert.commandWorked(testDB[collName].createIndex({x: 1}, {background: true})); + } + + /** + * Operations that will get replicated to both replica set nodes before rollback. + * + * These common operations are run against the node that will eventually go into rollback, so + * the failpoints will only be enabled on the rollback node. + */ + function CommonOps(node) { + // Create a collection on both data bearing nodes, so we can create an index on it. + const testDB = node.getDB(dbName); + assert.commandWorked(testDB.createCollection(collName)); + + // Hang background index builds. + hangIndexBuildsFailpoint(node, "alwaysOn"); + + jsTestLog("Starting background index build parallel shell."); + bgIndexThread = startParallelShell(createBgIndexFn, node.port); + + // Make sure the index build started and hit the failpoint. + jsTestLog("Waiting for background index build to start and hang due to failpoint."); + checkLog.contains(node, "build index on: " + testDB[collName].getFullName()); + checkLog.contains(node, "Hanging index build with no locks"); + } + + const rollbackTest = new RollbackTest(); + const originalPrimary = rollbackTest.getPrimary(); + CommonOps(originalPrimary); + + // Insert a document so that there is an operation to rollback. + const rollbackNode = rollbackTest.transitionToRollbackOperations(); + assert.writeOK(rollbackNode.getDB(dbName)["rollbackColl"].insert({x: 1})); + + // Allow rollback to start. There are no sync source ops. + rollbackTest.transitionToSyncSourceOperationsBeforeRollback(); + rollbackTest.transitionToSyncSourceOperationsDuringRollback(); + + // Make sure that rollback is hung waiting for the background index operation to complete. + jsTestLog("Waiting for rollback to block on the background index build completion."); + let msg1 = "Waiting for all background operations to complete before starting rollback"; + let msg2 = "Waiting for 1 background operations to complete on database '" + dbName + "'"; + checkLog.contains(rollbackNode, msg1); + checkLog.contains(rollbackNode, msg2); + + // Now turn off the index build failpoint, allowing rollback to continue and finish. + jsTestLog( + "Disabling 'hangAfterStartingIndexBuildUnlocked' failpoint on the rollback node so background index build can complete."); + hangIndexBuildsFailpoint(rollbackNode, "off"); + + // Make sure the background index build completed before rollback started. + checkLog.contains(rollbackNode, + "Finished waiting for background operations to complete before rollback"); + + // Wait for rollback to finish. + rollbackTest.transitionToSteadyStateOperations(); + + // Check the replica set. + rollbackTest.stop(); +}()); diff --git a/src/mongo/db/background.cpp b/src/mongo/db/background.cpp index 555062f7e63..d427c4db301 100644 --- a/src/mongo/db/background.cpp +++ b/src/mongo/db/background.cpp @@ -124,6 +124,14 @@ bool BackgroundOperation::inProgForDb(StringData db) { return dbsInProg.find(db) != dbsInProg.end(); } +int BackgroundOperation::numInProgForDb(StringData db) { + stdx::lock_guard<stdx::mutex> lk(m); + std::shared_ptr<BgInfo> bgInfo = mapFindWithDefault(dbsInProg, db, std::shared_ptr<BgInfo>()); + if (!bgInfo) + return 0; + return bgInfo->getOpsInProgCount(); +} + bool BackgroundOperation::inProgForNs(StringData ns) { stdx::lock_guard<stdx::mutex> lk(m); return nsInProg.find(ns) != nsInProg.end(); @@ -147,13 +155,6 @@ void BackgroundOperation::assertNoBgOpInProgForNs(StringData ns) { !inProgForNs(ns)); } -void BackgroundOperation::awaitNoBgOpInProgForDbs(std::vector<StringData> dbs) { - stdx::unique_lock<stdx::mutex> lk(m); - for (auto db : dbs) { - awaitNoBgOps(lk, &dbsInProg, db); - } -} - void BackgroundOperation::awaitNoBgOpInProgForDb(StringData db) { stdx::unique_lock<stdx::mutex> lk(m); awaitNoBgOps(lk, &dbsInProg, db); diff --git a/src/mongo/db/background.h b/src/mongo/db/background.h index 1861e1b5f83..acf0129efe7 100644 --- a/src/mongo/db/background.h +++ b/src/mongo/db/background.h @@ -58,19 +58,10 @@ class BackgroundOperation { public: static bool inProgForDb(StringData db); + static int numInProgForDb(StringData db); static bool inProgForNs(StringData ns); static void assertNoBgOpInProgForDb(StringData db); static void assertNoBgOpInProgForNs(StringData ns); - - /** - * Waits until there are no background operations in progress for all databases in the given - * list. This function assumes that while it is executing, no new background jobs are started on - * any of the given databases. - * - * @param dbs the list of all databases on which to wait for background operations to complete. - */ - static void awaitNoBgOpInProgForDbs(std::vector<StringData> dbs); - static void awaitNoBgOpInProgForDb(StringData db); static void awaitNoBgOpInProgForNs(StringData ns); static void dump(std::ostream&); diff --git a/src/mongo/db/catalog/index_create_impl.cpp b/src/mongo/db/catalog/index_create_impl.cpp index 598fe9fa581..1b93f70d7bd 100644 --- a/src/mongo/db/catalog/index_create_impl.cpp +++ b/src/mongo/db/catalog/index_create_impl.cpp @@ -401,9 +401,16 @@ Status MultiIndexBlockImpl::insertAllDocumentsInCollection(std::set<RecordId>* d "'hangAfterStartingIndexBuildUnlocked' failpoint"; sleepmillis(1000); } - // If we want to support this, we'd need to regrab the lock and be sure that all callers are - // ok with us yielding. They should be for BG indexes, but not for foreground. - invariant(!"the hangAfterStartingIndexBuildUnlocked failpoint can't be turned off"); + + if (_buildInBackground) { + _opCtx->lockState()->restoreLockState(lockInfo); + _opCtx->recoveryUnit()->abandonSnapshot(); + return Status(ErrorCodes::OperationFailed, + "background index build aborted due to failpoint"); + } else { + invariant( + !"the hangAfterStartingIndexBuildUnlocked failpoint can't be turned off for foreground index builds"); + } } progress->finished(); diff --git a/src/mongo/db/repl/rollback_impl.cpp b/src/mongo/db/repl/rollback_impl.cpp index fa03b891526..63dfe97b02b 100644 --- a/src/mongo/db/repl/rollback_impl.cpp +++ b/src/mongo/db/repl/rollback_impl.cpp @@ -32,6 +32,7 @@ #include "mongo/db/repl/rollback_impl.h" +#include "mongo/db/background.h" #include "mongo/db/concurrency/d_concurrency.h" #include "mongo/db/logical_time_validator.h" #include "mongo/db/operation_context.h" @@ -91,6 +92,13 @@ Status RollbackImpl::runRollback(OperationContext* opCtx) { } _listener->onTransitionToRollback(); + // Wait for all background index builds to complete before starting the rollback process. + status = _awaitBgIndexCompletion(opCtx); + if (!status.isOK()) { + return status; + } + _listener->onBgIndexesComplete(); + auto commonPointSW = _findCommonPoint(); if (!commonPointSW.isOK()) { return commonPointSW.getStatus(); @@ -180,6 +188,37 @@ Status RollbackImpl::_transitionToRollback(OperationContext* opCtx) { return Status::OK(); } +Status RollbackImpl::_awaitBgIndexCompletion(OperationContext* opCtx) { + invariant(opCtx); + if (_isInShutdown()) { + return Status(ErrorCodes::ShutdownInProgress, "rollback shutting down"); + } + + // Get a list of all databases. + StorageEngine* storageEngine = opCtx->getServiceContext()->getGlobalStorageEngine(); + std::vector<std::string> dbs; + { + Lock::GlobalLock lk(opCtx, MODE_IS, UINT_MAX); + storageEngine->listDatabases(&dbs); + } + + // Wait for all background operations to complete by waiting on each database. + std::vector<StringData> dbNames(dbs.begin(), dbs.end()); + log() << "Waiting for all background operations to complete before starting rollback"; + for (auto db : dbNames) { + LOG(1) << "Waiting for " << BackgroundOperation::numInProgForDb(db) + << " background operations to complete on database '" << db << "'"; + BackgroundOperation::awaitNoBgOpInProgForDb(db); + // Check for shutdown again. + if (_isInShutdown()) { + return Status(ErrorCodes::ShutdownInProgress, "rollback shutting down"); + } + } + + log() << "Finished waiting for background operations to complete before rollback"; + return Status::OK(); +} + StatusWith<Timestamp> RollbackImpl::_findCommonPoint() { if (_isInShutdown()) { return Status(ErrorCodes::ShutdownInProgress, "rollback shutting down"); diff --git a/src/mongo/db/repl/rollback_impl.h b/src/mongo/db/repl/rollback_impl.h index b8a6b5b04cf..c88899e70b0 100644 --- a/src/mongo/db/repl/rollback_impl.h +++ b/src/mongo/db/repl/rollback_impl.h @@ -100,6 +100,11 @@ public: virtual void onTransitionToRollback() noexcept {} /** + * Function called after all background index builds have completed. + */ + virtual void onBgIndexesComplete() noexcept {} + + /** * Function called after we find the common point. */ virtual void onCommonPointFound(Timestamp commonPoint) noexcept {} @@ -169,6 +174,16 @@ private: Status _transitionToRollback(OperationContext* opCtx); /** + * Waits for any in-progress background index builds to complete. We do this before beginning + * the rollback process to prevent any issues surrounding index builds pausing/resuming around a + * call to 'recoverToStableTimestamp'. It's not clear that an index build, resumed in this way, + * that continues until completion, would be consistent with the collection data. Waiting for + * all background index builds to complete is a conservative approach, to avoid any of these + * potential issues. + */ + Status _awaitBgIndexCompletion(OperationContext* opCtx); + + /** * Recovers to the stable timestamp while holding the global exclusive lock. */ Status _recoverToStableTimestamp(OperationContext* opCtx); |