From a945fb4697af6e2360ddcdd4eee2b73a0f42469f Mon Sep 17 00:00:00 2001 From: Kevin Pulo Date: Wed, 21 Feb 2018 03:08:22 +0000 Subject: SERVER-29807 Make CollectionRangeDeleter wait for w:majority after each range That is, wait once after the range has finished being deleted - rather than waiting after each batch of ~100 document deletions (while yielding). Also log before and after waiting for majority replication. --- src/mongo/db/s/collection_range_deleter.cpp | 100 +++++++++++++++------------- 1 file changed, 54 insertions(+), 46 deletions(-) (limited to 'src/mongo/db') diff --git a/src/mongo/db/s/collection_range_deleter.cpp b/src/mongo/db/s/collection_range_deleter.cpp index 4ae50230f6d..6c0fc7b0bf6 100644 --- a/src/mongo/db/s/collection_range_deleter.cpp +++ b/src/mongo/db/s/collection_range_deleter.cpp @@ -202,67 +202,75 @@ boost::optional CollectionRangeDeleter::cleanUpNextRange( wrote = e.toStatus(); warning() << e.what(); } + } // drop autoColl - if (!wrote.isOK() || wrote.getValue() == 0) { - if (wrote.isOK()) { - LOG(0) << "No documents remain to delete in " << nss << " range " - << redact(range->toString()); - } - - stdx::lock_guard scopedLock(css->_metadataManager->_managerLock); - self->_pop(wrote.getStatus()); - if (!self->_orphans.empty()) { - LOG(1) << "Deleting " << nss.ns() << " range " - << redact(self->_orphans.front().range.toString()) << " next."; - } - - return Date_t{}; + if (!wrote.isOK() || wrote.getValue() == 0) { + if (wrote.isOK()) { + LOG(0) << "No documents remain to delete in " << nss << " range " + << redact(range->toString()); } - } // drop autoColl - invariant(range); - invariantOK(wrote.getStatus()); - invariant(wrote.getValue() > 0); + // Wait for majority replication even when wrote isn't OK or == 0, because it might have + // been OK and/or > 0 previously, and the deletions must be persistent before notifying + // clients in _pop(). - repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx); - const auto clientOpTime = repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp(); + LOG(0) << "Waiting for majority replication of local deletions in " << nss.ns() << " range " + << redact(range->toString()); - // Wait for replication outside the lock - const auto status = [&] { - try { - WriteConcernResult unusedWCResult; - return waitForWriteConcern(opCtx, clientOpTime, kMajorityWriteConcern, &unusedWCResult); - } catch (const DBException& e) { - return e.toStatus(); - } - }(); + repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx); + const auto clientOpTime = repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp(); - if (!status.isOK()) { - LOG(0) << "Error when waiting for write concern after removing " << nss << " range " - << redact(range->toString()) << " : " << redact(status.reason()); + // Wait for replication outside the lock + const auto status = [&] { + try { + WriteConcernResult unusedWCResult; + return waitForWriteConcern( + opCtx, clientOpTime, kMajorityWriteConcern, &unusedWCResult); + } catch (const DBException& e) { + return e.toStatus(); + } + }(); + // Get the lock again to finish off this range (including notifying, if necessary). // Don't allow lock interrupts while cleaning up. UninterruptibleLockGuard noInterrupt(opCtx->lockState()); AutoGetCollection autoColl(opCtx, nss, MODE_IX); auto* const css = CollectionShardingState::get(opCtx, nss); - + auto* const self = forTestOnly ? forTestOnly : &css->_metadataManager->_rangesToClean; stdx::lock_guard scopedLock(css->_metadataManager->_managerLock); - auto* const self = &css->_metadataManager->_rangesToClean; - - // If range were already popped (e.g. by dropping nss during the waitForWriteConcern above) - // its notification would have been triggered, so this check suffices to ensure that it is - // safe to pop the range here - if (!notification.ready()) { - invariant(!self->isEmpty() && self->_orphans.front().notification == notification); - LOG(0) << "Abandoning deletion of latest range in " << nss.ns() << " after " - << wrote.getValue() << " local deletions because of replication failure"; - self->_pop(status); + + if (!status.isOK()) { + LOG(0) << "Error when waiting for write concern after removing " << nss << " range " + << redact(range->toString()) << " : " << redact(status.reason()); + + // If range were already popped (e.g. by dropping nss during the waitForWriteConcern + // above) its notification would have been triggered, so this check suffices to ensure + // that it is safe to pop the range here + if (!notification.ready()) { + invariant(!self->isEmpty() && self->_orphans.front().notification == notification); + LOG(0) << "Abandoning deletion of latest range in " << nss.ns() << " after local " + << "deletions because of replication failure"; + self->_pop(status); + } + } else { + LOG(0) << "Finished deleting documents in " << nss.ns() << " range " + << redact(range->toString()); + + self->_pop(wrote.getStatus()); } - } else { - LOG(1) << "Deleted " << wrote.getValue() << " documents in " << nss.ns() << " range " - << redact(range->toString()); + + if (!self->_orphans.empty()) { + LOG(1) << "Deleting " << nss.ns() << " range " + << redact(self->_orphans.front().range.toString()) << " next."; + } + + return Date_t{}; } + invariant(range); + invariantOK(wrote.getStatus()); + invariant(wrote.getValue() > 0); + notification.abandon(); return Date_t{}; } -- cgit v1.2.1