summaryrefslogtreecommitdiff
path: root/src/mongo/db/repl/initial_syncer.cpp
diff options
context:
space:
mode:
authorMatthew Russotto <matthew.russotto@10gen.com>2020-01-27 11:13:14 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-01-28 22:51:06 +0000
commit681631228dbaa98e00d1ea6d98c00662ef293a2b (patch)
tree087ad02de3606d57317764bc34d70ff23aa1cfd4 /src/mongo/db/repl/initial_syncer.cpp
parentf2ea3303ab1116992442102c03371b9e50de789b (diff)
downloadmongo-681631228dbaa98e00d1ea6d98c00662ef293a2b.tar.gz
SERVER-45503 Arrange events so cloner onCompletion is never run with lock held
Diffstat (limited to 'src/mongo/db/repl/initial_syncer.cpp')
-rw-r--r--src/mongo/db/repl/initial_syncer.cpp60
1 files changed, 34 insertions, 26 deletions
diff --git a/src/mongo/db/repl/initial_syncer.cpp b/src/mongo/db/repl/initial_syncer.cpp
index 0c9bda496d3..6914434ccfb 100644
--- a/src/mongo/db/repl/initial_syncer.cpp
+++ b/src/mongo/db/repl/initial_syncer.cpp
@@ -1075,35 +1075,43 @@ void InitialSyncer::_fcvFetcherCallback(const StatusWith<Fetcher::QueryResponse>
LOG(2) << "Starting AllDatabaseCloner: " << _initialSyncState->allDatabaseCloner->toString();
- _initialSyncState->allDatabaseClonerFuture =
- _initialSyncState->allDatabaseCloner->runOnExecutor(_clonerExec)
- .onCompletion([this, onCompletionGuard](Status status) mutable {
- // The completion guard must run on the main executor. This only makes a difference
- // for unit tests, but we always schedule it that way to avoid special casing test
- // code.
- stdx::unique_lock<Latch> lock(_mutex);
- auto exec_status = _exec->scheduleWork(
- [this, status, onCompletionGuard](executor::TaskExecutor::CallbackArgs args) {
- _allDatabaseClonerCallback(status, onCompletionGuard);
- });
- if (!exec_status.isOK()) {
- onCompletionGuard->setResultAndCancelRemainingWork_inlock(
- lock, exec_status.getStatus());
- // In the shutdown case, it is possible the completion guard will be run
- // from this thread (since the lambda holding another copy didn't schedule).
- // If it does, we will self-deadlock if we're holding the lock, so release it.
- lock.unlock();
- }
- // In unit tests, this reset ensures the completion guard does not run during the
- // destruction of the lambda (which occurs on the wrong executor), except in the
- // shutdown case.
- onCompletionGuard.reset();
- });
-
- if (!status.isOK()) {
+ auto [startClonerFuture, startCloner] =
+ _initialSyncState->allDatabaseCloner->runOnExecutorEvent(_clonerExec);
+ // runOnExecutorEvent ensures the future is not ready unless an error has occurred.
+ if (startClonerFuture.isReady()) {
+ status = startClonerFuture.getNoThrow();
+ invariant(!status.isOK());
onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock, status);
return;
}
+ _initialSyncState->allDatabaseClonerFuture =
+ std::move(startClonerFuture).onCompletion([this, onCompletionGuard](Status status) mutable {
+ // The completion guard must run on the main executor, and never inline. In unit tests,
+ // without the executor call, it would run on the wrong executor. In both production
+ // and in unit tests, if the cloner finishes very quickly, the callback could run
+ // in-line and result in self-deadlock.
+ stdx::unique_lock<Latch> lock(_mutex);
+ auto exec_status = _exec->scheduleWork(
+ [this, status, onCompletionGuard](executor::TaskExecutor::CallbackArgs args) {
+ _allDatabaseClonerCallback(status, onCompletionGuard);
+ });
+ if (!exec_status.isOK()) {
+ onCompletionGuard->setResultAndCancelRemainingWork_inlock(lock,
+ exec_status.getStatus());
+ // In the shutdown case, it is possible the completion guard will be run
+ // from this thread (since the lambda holding another copy didn't schedule).
+ // If it does, we will self-deadlock if we're holding the lock, so release it.
+ lock.unlock();
+ }
+ // In unit tests, this reset ensures the completion guard does not run during the
+ // destruction of the lambda (which occurs on the wrong executor), except in the
+ // shutdown case.
+ onCompletionGuard.reset();
+ });
+ lock.unlock();
+ // Start (and therefore finish) the cloners outside the lock. This ensures onCompletion
+ // is not run with the mutex held, which would result in self-deadlock.
+ _clonerExec->signalEvent(startCloner);
}
void InitialSyncer::_oplogFetcherCallback(const Status& oplogFetcherFinishStatus,