diff options
author | Allison Easton <allison.easton@mongodb.com> | 2022-06-10 13:33:24 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-06-10 13:59:34 +0000 |
commit | 119a34c2e06680d35c089effa2c9c809e1bd1102 (patch) | |
tree | 0a637908d7a789a8f953c791c3af23ff5094a1bd | |
parent | 1937d9457f2f04ca865e983fdce5db6d014f1f73 (diff) | |
download | mongo-119a34c2e06680d35c089effa2c9c809e1bd1102.tar.gz |
SERVER-63243 Range deleter must not clean up orphan ranges in a round-robin fashion
(cherry picked from commit f44581d5bfe275a3b9f0454dd7843c04ccfd1f2d
-rw-r--r-- | src/mongo/db/s/range_deletion_util.cpp | 171 |
1 files changed, 91 insertions, 80 deletions
diff --git a/src/mongo/db/s/range_deletion_util.cpp b/src/mongo/db/s/range_deletion_util.cpp index 700412d18b3..ee4562105d9 100644 --- a/src/mongo/db/s/range_deletion_util.cpp +++ b/src/mongo/db/s/range_deletion_util.cpp @@ -298,87 +298,98 @@ ExecutorFuture<void> deleteRangeInBatches(const std::shared_ptr<executor::TaskEx const UUID& migrationId, int numDocsToRemovePerBatch, Milliseconds delayBetweenBatches) { - return AsyncTry([=] { - return withTemporaryOperationContext( - [=](OperationContext* opCtx) { - LOGV2_DEBUG(5346200, - 1, - "Starting batch deletion", - "namespace"_attr = nss, - "range"_attr = redact(range.toString()), - "numDocsToRemovePerBatch"_attr = numDocsToRemovePerBatch, - "delayBetweenBatches"_attr = delayBetweenBatches); - - ensureRangeDeletionTaskStillExists(opCtx, migrationId); - - int numDeleted; - - { - AutoGetCollection collection(opCtx, nss, MODE_IX); - - // Ensure the collection exists and has not been dropped or dropped and - // recreated - uassert(ErrorCodes:: - RangeDeletionAbandonedBecauseCollectionWithUUIDDoesNotExist, - "Collection has been dropped since enqueuing this range " - "deletion task. No need to delete documents.", - !collectionUuidHasChanged( - nss, collection.getCollection(), collectionUuid)); - - markRangeDeletionTaskAsProcessing(opCtx, migrationId); - - { - ScopedRangeDeleterLock rangeDeleterLock(opCtx, collectionUuid); - - numDeleted = - uassertStatusOK(deleteNextBatch(opCtx, - collection.getCollection(), - keyPattern, - range, - numDocsToRemovePerBatch)); - - migrationutil::persistUpdatedNumOrphans( - opCtx, migrationId, collectionUuid, -numDeleted); - } - - if (MONGO_unlikely(hangAfterDoingDeletion.shouldFail())) { - hangAfterDoingDeletion.pauseWhileSet(opCtx); - } - } - - LOGV2_DEBUG(23769, - 1, - "Deleted documents in pass", - "numDeleted"_attr = numDeleted, - "namespace"_attr = nss.ns(), - "collectionUUID"_attr = collectionUuid, - "range"_attr = range.toString()); - - if (numDeleted > 0) { - // (SERVER-62368) The range-deleter executor is mono-threaded, so - // sleeping synchronously for `delayBetweenBatches` ensures that no other - // batch is going to be cleared up before the expected delay. - opCtx->sleepFor(delayBetweenBatches); - } - - return numDeleted == 0; - }, - nss); - }) - .until([=](StatusWith<bool> swAllDocumentsInRangeDeleted) { - // Continue iterating until there are no more documents to delete, retrying on any error - // that doesn't indicate that this node is stepping down. - return (swAllDocumentsInRangeDeleted.isOK() && - swAllDocumentsInRangeDeleted.getValue()) || - swAllDocumentsInRangeDeleted == - ErrorCodes::RangeDeletionAbandonedBecauseCollectionWithUUIDDoesNotExist || - swAllDocumentsInRangeDeleted == - ErrorCodes::RangeDeletionAbandonedBecauseTaskDocumentDoesNotExist || - swAllDocumentsInRangeDeleted == ErrorCodes::KeyPatternShorterThanBound || - ErrorCodes::isShutdownError(swAllDocumentsInRangeDeleted.getStatus()) || - ErrorCodes::isNotPrimaryError(swAllDocumentsInRangeDeleted.getStatus()); + return ExecutorFuture<void>(executor) + .then([=] { + bool allDocsRemoved = false; + // Delete all batches in this range unless a stepdown error occurs. Do not yield the + // executor to ensure that this range is fully deleted before another range is + // processed. + while (!allDocsRemoved) { + try { + allDocsRemoved = withTemporaryOperationContext( + [=](OperationContext* opCtx) { + LOGV2_DEBUG(5346200, + 1, + "Starting batch deletion", + "namespace"_attr = nss, + "range"_attr = redact(range.toString()), + "numDocsToRemovePerBatch"_attr = numDocsToRemovePerBatch, + "delayBetweenBatches"_attr = delayBetweenBatches); + + ensureRangeDeletionTaskStillExists(opCtx, migrationId); + + int numDeleted; + + { + AutoGetCollection collection(opCtx, nss, MODE_IX); + + // Ensure the collection exists and has not been dropped or dropped + // and recreated + uassert( + ErrorCodes:: + RangeDeletionAbandonedBecauseCollectionWithUUIDDoesNotExist, + "Collection has been dropped since enqueuing this range " + "deletion task. No need to delete documents.", + !collectionUuidHasChanged( + nss, collection.getCollection(), collectionUuid)); + + markRangeDeletionTaskAsProcessing(opCtx, migrationId); + + { + ScopedRangeDeleterLock rangeDeleterLock(opCtx, collectionUuid); + + numDeleted = + uassertStatusOK(deleteNextBatch(opCtx, + collection.getCollection(), + keyPattern, + range, + numDocsToRemovePerBatch)); + + migrationutil::persistUpdatedNumOrphans( + opCtx, migrationId, collectionUuid, -numDeleted); + } + + if (MONGO_unlikely(hangAfterDoingDeletion.shouldFail())) { + hangAfterDoingDeletion.pauseWhileSet(opCtx); + } + } + + LOGV2_DEBUG(23769, + 1, + "Deleted documents in pass", + "numDeleted"_attr = numDeleted, + "namespace"_attr = nss.ns(), + "collectionUUID"_attr = collectionUuid, + "range"_attr = range.toString()); + + if (numDeleted > 0) { + // (SERVER-62368) The range-deleter executor is mono-threaded, so + // sleeping synchronously for `delayBetweenBatches` ensures that no + // other batch is going to be cleared up before the expected delay. + opCtx->sleepFor(delayBetweenBatches); + } + + return numDeleted < numDocsToRemovePerBatch; + }, + nss); + } catch (const DBException& ex) { + // Errors other than those indicating stepdown and those that indicate that the + // range deletion can no longer occur should be retried. + auto errorCode = ex.code(); + if (errorCode == + ErrorCodes:: + RangeDeletionAbandonedBecauseCollectionWithUUIDDoesNotExist || + errorCode == + ErrorCodes::RangeDeletionAbandonedBecauseTaskDocumentDoesNotExist || + errorCode == ErrorCodes::KeyPatternShorterThanBound || + ErrorCodes::isShutdownError(errorCode) || + ErrorCodes::isNotPrimaryError(errorCode)) { + return ex.toStatus(); + }; + } + } + return Status::OK(); }) - .on(executor, CancellationToken::uncancelable()) .ignoreValue(); } |