summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorKevin Pulo <kevin.pulo@mongodb.com>2018-02-21 03:08:22 +0000
committerKevin Pulo <kevin.pulo@mongodb.com>2018-03-02 06:56:31 +0000
commita945fb4697af6e2360ddcdd4eee2b73a0f42469f (patch)
tree3c309b9b159d646e438aa6f79a0b238bb359d307 /src
parent21cd15430f92461afd381b2f34c5582e540d63f3 (diff)
downloadmongo-a945fb4697af6e2360ddcdd4eee2b73a0f42469f.tar.gz
SERVER-29807 Make CollectionRangeDeleter wait for w:majority after each range
That is, wait once after the range has finished being deleted - rather than waiting after each batch of ~100 document deletions (while yielding). Also log before and after waiting for majority replication.
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/s/collection_range_deleter.cpp100
1 files changed, 54 insertions, 46 deletions
diff --git a/src/mongo/db/s/collection_range_deleter.cpp b/src/mongo/db/s/collection_range_deleter.cpp
index 4ae50230f6d..6c0fc7b0bf6 100644
--- a/src/mongo/db/s/collection_range_deleter.cpp
+++ b/src/mongo/db/s/collection_range_deleter.cpp
@@ -202,67 +202,75 @@ boost::optional<Date_t> CollectionRangeDeleter::cleanUpNextRange(
wrote = e.toStatus();
warning() << e.what();
}
+ } // drop autoColl
- if (!wrote.isOK() || wrote.getValue() == 0) {
- if (wrote.isOK()) {
- LOG(0) << "No documents remain to delete in " << nss << " range "
- << redact(range->toString());
- }
-
- stdx::lock_guard<stdx::mutex> scopedLock(css->_metadataManager->_managerLock);
- self->_pop(wrote.getStatus());
- if (!self->_orphans.empty()) {
- LOG(1) << "Deleting " << nss.ns() << " range "
- << redact(self->_orphans.front().range.toString()) << " next.";
- }
-
- return Date_t{};
+ if (!wrote.isOK() || wrote.getValue() == 0) {
+ if (wrote.isOK()) {
+ LOG(0) << "No documents remain to delete in " << nss << " range "
+ << redact(range->toString());
}
- } // drop autoColl
- invariant(range);
- invariantOK(wrote.getStatus());
- invariant(wrote.getValue() > 0);
+ // Wait for majority replication even when wrote isn't OK or == 0, because it might have
+ // been OK and/or > 0 previously, and the deletions must be persistent before notifying
+ // clients in _pop().
- repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx);
- const auto clientOpTime = repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp();
+ LOG(0) << "Waiting for majority replication of local deletions in " << nss.ns() << " range "
+ << redact(range->toString());
- // Wait for replication outside the lock
- const auto status = [&] {
- try {
- WriteConcernResult unusedWCResult;
- return waitForWriteConcern(opCtx, clientOpTime, kMajorityWriteConcern, &unusedWCResult);
- } catch (const DBException& e) {
- return e.toStatus();
- }
- }();
+ repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx);
+ const auto clientOpTime = repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp();
- if (!status.isOK()) {
- LOG(0) << "Error when waiting for write concern after removing " << nss << " range "
- << redact(range->toString()) << " : " << redact(status.reason());
+ // Wait for replication outside the lock
+ const auto status = [&] {
+ try {
+ WriteConcernResult unusedWCResult;
+ return waitForWriteConcern(
+ opCtx, clientOpTime, kMajorityWriteConcern, &unusedWCResult);
+ } catch (const DBException& e) {
+ return e.toStatus();
+ }
+ }();
+ // Get the lock again to finish off this range (including notifying, if necessary).
// Don't allow lock interrupts while cleaning up.
UninterruptibleLockGuard noInterrupt(opCtx->lockState());
AutoGetCollection autoColl(opCtx, nss, MODE_IX);
auto* const css = CollectionShardingState::get(opCtx, nss);
-
+ auto* const self = forTestOnly ? forTestOnly : &css->_metadataManager->_rangesToClean;
stdx::lock_guard<stdx::mutex> scopedLock(css->_metadataManager->_managerLock);
- auto* const self = &css->_metadataManager->_rangesToClean;
-
- // If range were already popped (e.g. by dropping nss during the waitForWriteConcern above)
- // its notification would have been triggered, so this check suffices to ensure that it is
- // safe to pop the range here
- if (!notification.ready()) {
- invariant(!self->isEmpty() && self->_orphans.front().notification == notification);
- LOG(0) << "Abandoning deletion of latest range in " << nss.ns() << " after "
- << wrote.getValue() << " local deletions because of replication failure";
- self->_pop(status);
+
+ if (!status.isOK()) {
+ LOG(0) << "Error when waiting for write concern after removing " << nss << " range "
+ << redact(range->toString()) << " : " << redact(status.reason());
+
+ // If range were already popped (e.g. by dropping nss during the waitForWriteConcern
+ // above) its notification would have been triggered, so this check suffices to ensure
+ // that it is safe to pop the range here
+ if (!notification.ready()) {
+ invariant(!self->isEmpty() && self->_orphans.front().notification == notification);
+ LOG(0) << "Abandoning deletion of latest range in " << nss.ns() << " after local "
+ << "deletions because of replication failure";
+ self->_pop(status);
+ }
+ } else {
+ LOG(0) << "Finished deleting documents in " << nss.ns() << " range "
+ << redact(range->toString());
+
+ self->_pop(wrote.getStatus());
}
- } else {
- LOG(1) << "Deleted " << wrote.getValue() << " documents in " << nss.ns() << " range "
- << redact(range->toString());
+
+ if (!self->_orphans.empty()) {
+ LOG(1) << "Deleting " << nss.ns() << " range "
+ << redact(self->_orphans.front().range.toString()) << " next.";
+ }
+
+ return Date_t{};
}
+ invariant(range);
+ invariantOK(wrote.getStatus());
+ invariant(wrote.getValue() > 0);
+
notification.abandon();
return Date_t{};
}