summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAllison Easton <allison.easton@mongodb.com>2022-06-10 13:33:24 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-06-10 13:59:34 +0000
commit119a34c2e06680d35c089effa2c9c809e1bd1102 (patch)
tree0a637908d7a789a8f953c791c3af23ff5094a1bd
parent1937d9457f2f04ca865e983fdce5db6d014f1f73 (diff)
downloadmongo-119a34c2e06680d35c089effa2c9c809e1bd1102.tar.gz
SERVER-63243 Range deleter must not clean up orphan ranges in a round-robin fashion
(cherry picked from commit f44581d5bfe275a3b9f0454dd7843c04ccfd1f2d
-rw-r--r--src/mongo/db/s/range_deletion_util.cpp171
1 files changed, 91 insertions, 80 deletions
diff --git a/src/mongo/db/s/range_deletion_util.cpp b/src/mongo/db/s/range_deletion_util.cpp
index 700412d18b3..ee4562105d9 100644
--- a/src/mongo/db/s/range_deletion_util.cpp
+++ b/src/mongo/db/s/range_deletion_util.cpp
@@ -298,87 +298,98 @@ ExecutorFuture<void> deleteRangeInBatches(const std::shared_ptr<executor::TaskEx
const UUID& migrationId,
int numDocsToRemovePerBatch,
Milliseconds delayBetweenBatches) {
- return AsyncTry([=] {
- return withTemporaryOperationContext(
- [=](OperationContext* opCtx) {
- LOGV2_DEBUG(5346200,
- 1,
- "Starting batch deletion",
- "namespace"_attr = nss,
- "range"_attr = redact(range.toString()),
- "numDocsToRemovePerBatch"_attr = numDocsToRemovePerBatch,
- "delayBetweenBatches"_attr = delayBetweenBatches);
-
- ensureRangeDeletionTaskStillExists(opCtx, migrationId);
-
- int numDeleted;
-
- {
- AutoGetCollection collection(opCtx, nss, MODE_IX);
-
- // Ensure the collection exists and has not been dropped or dropped and
- // recreated
- uassert(ErrorCodes::
- RangeDeletionAbandonedBecauseCollectionWithUUIDDoesNotExist,
- "Collection has been dropped since enqueuing this range "
- "deletion task. No need to delete documents.",
- !collectionUuidHasChanged(
- nss, collection.getCollection(), collectionUuid));
-
- markRangeDeletionTaskAsProcessing(opCtx, migrationId);
-
- {
- ScopedRangeDeleterLock rangeDeleterLock(opCtx, collectionUuid);
-
- numDeleted =
- uassertStatusOK(deleteNextBatch(opCtx,
- collection.getCollection(),
- keyPattern,
- range,
- numDocsToRemovePerBatch));
-
- migrationutil::persistUpdatedNumOrphans(
- opCtx, migrationId, collectionUuid, -numDeleted);
- }
-
- if (MONGO_unlikely(hangAfterDoingDeletion.shouldFail())) {
- hangAfterDoingDeletion.pauseWhileSet(opCtx);
- }
- }
-
- LOGV2_DEBUG(23769,
- 1,
- "Deleted documents in pass",
- "numDeleted"_attr = numDeleted,
- "namespace"_attr = nss.ns(),
- "collectionUUID"_attr = collectionUuid,
- "range"_attr = range.toString());
-
- if (numDeleted > 0) {
- // (SERVER-62368) The range-deleter executor is mono-threaded, so
- // sleeping synchronously for `delayBetweenBatches` ensures that no other
- // batch is going to be cleared up before the expected delay.
- opCtx->sleepFor(delayBetweenBatches);
- }
-
- return numDeleted == 0;
- },
- nss);
- })
- .until([=](StatusWith<bool> swAllDocumentsInRangeDeleted) {
- // Continue iterating until there are no more documents to delete, retrying on any error
- // that doesn't indicate that this node is stepping down.
- return (swAllDocumentsInRangeDeleted.isOK() &&
- swAllDocumentsInRangeDeleted.getValue()) ||
- swAllDocumentsInRangeDeleted ==
- ErrorCodes::RangeDeletionAbandonedBecauseCollectionWithUUIDDoesNotExist ||
- swAllDocumentsInRangeDeleted ==
- ErrorCodes::RangeDeletionAbandonedBecauseTaskDocumentDoesNotExist ||
- swAllDocumentsInRangeDeleted == ErrorCodes::KeyPatternShorterThanBound ||
- ErrorCodes::isShutdownError(swAllDocumentsInRangeDeleted.getStatus()) ||
- ErrorCodes::isNotPrimaryError(swAllDocumentsInRangeDeleted.getStatus());
+ return ExecutorFuture<void>(executor)
+ .then([=] {
+ bool allDocsRemoved = false;
+ // Delete all batches in this range unless a stepdown error occurs. Do not yield the
+ // executor to ensure that this range is fully deleted before another range is
+ // processed.
+ while (!allDocsRemoved) {
+ try {
+ allDocsRemoved = withTemporaryOperationContext(
+ [=](OperationContext* opCtx) {
+ LOGV2_DEBUG(5346200,
+ 1,
+ "Starting batch deletion",
+ "namespace"_attr = nss,
+ "range"_attr = redact(range.toString()),
+ "numDocsToRemovePerBatch"_attr = numDocsToRemovePerBatch,
+ "delayBetweenBatches"_attr = delayBetweenBatches);
+
+ ensureRangeDeletionTaskStillExists(opCtx, migrationId);
+
+ int numDeleted;
+
+ {
+ AutoGetCollection collection(opCtx, nss, MODE_IX);
+
+ // Ensure the collection exists and has not been dropped or dropped
+ // and recreated
+ uassert(
+ ErrorCodes::
+ RangeDeletionAbandonedBecauseCollectionWithUUIDDoesNotExist,
+ "Collection has been dropped since enqueuing this range "
+ "deletion task. No need to delete documents.",
+ !collectionUuidHasChanged(
+ nss, collection.getCollection(), collectionUuid));
+
+ markRangeDeletionTaskAsProcessing(opCtx, migrationId);
+
+ {
+ ScopedRangeDeleterLock rangeDeleterLock(opCtx, collectionUuid);
+
+ numDeleted =
+ uassertStatusOK(deleteNextBatch(opCtx,
+ collection.getCollection(),
+ keyPattern,
+ range,
+ numDocsToRemovePerBatch));
+
+ migrationutil::persistUpdatedNumOrphans(
+ opCtx, migrationId, collectionUuid, -numDeleted);
+ }
+
+ if (MONGO_unlikely(hangAfterDoingDeletion.shouldFail())) {
+ hangAfterDoingDeletion.pauseWhileSet(opCtx);
+ }
+ }
+
+ LOGV2_DEBUG(23769,
+ 1,
+ "Deleted documents in pass",
+ "numDeleted"_attr = numDeleted,
+ "namespace"_attr = nss.ns(),
+ "collectionUUID"_attr = collectionUuid,
+ "range"_attr = range.toString());
+
+ if (numDeleted > 0) {
+ // (SERVER-62368) The range-deleter executor is mono-threaded, so
+ // sleeping synchronously for `delayBetweenBatches` ensures that no
+ // other batch is going to be cleared up before the expected delay.
+ opCtx->sleepFor(delayBetweenBatches);
+ }
+
+ return numDeleted < numDocsToRemovePerBatch;
+ },
+ nss);
+ } catch (const DBException& ex) {
+ // Errors other than those indicating stepdown and those that indicate that the
+ // range deletion can no longer occur should be retried.
+ auto errorCode = ex.code();
+ if (errorCode ==
+ ErrorCodes::
+ RangeDeletionAbandonedBecauseCollectionWithUUIDDoesNotExist ||
+ errorCode ==
+ ErrorCodes::RangeDeletionAbandonedBecauseTaskDocumentDoesNotExist ||
+ errorCode == ErrorCodes::KeyPatternShorterThanBound ||
+ ErrorCodes::isShutdownError(errorCode) ||
+ ErrorCodes::isNotPrimaryError(errorCode)) {
+ return ex.toStatus();
+ };
+ }
+ }
+ return Status::OK();
})
- .on(executor, CancellationToken::uncancelable())
.ignoreValue();
}