summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaloian Manassiev <kaloian.manassiev@mongodb.com>2022-05-31 17:07:58 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-06-04 21:32:08 +0000
commitbd0d8c22a35afbbc145a850900df0ddf56da597f (patch)
tree9b1b9a52ebefd6a458a182ec4eabf381446201e6
parentfb08d4e18b6fc61924514a7785352c735c1cceeb (diff)
downloadmongo-bd0d8c22a35afbbc145a850900df0ddf56da597f.tar.gz
SERVER-66866 Make the range deleter not sleep under collection lock
(cherry picked from commit cec61a460e69c47e1bf0e455214f776c62053f53)
-rw-r--r--src/mongo/db/s/range_deletion_util.cpp89
-rw-r--r--src/mongo/db/s/range_deletion_util.h15
-rw-r--r--src/mongo/db/s/sharding_runtime_d_params.idl8
3 files changed, 49 insertions, 63 deletions
diff --git a/src/mongo/db/s/range_deletion_util.cpp b/src/mongo/db/s/range_deletion_util.cpp
index cf2106d0575..22d03b32ce4 100644
--- a/src/mongo/db/s/range_deletion_util.cpp
+++ b/src/mongo/db/s/range_deletion_util.cpp
@@ -34,9 +34,8 @@
#include "mongo/db/s/range_deletion_util.h"
#include <algorithm>
-#include <utility>
-
#include <boost/optional.hpp>
+#include <utility>
#include "mongo/db/catalog/index_catalog.h"
#include "mongo/db/catalog_raii.h"
@@ -56,6 +55,7 @@
#include "mongo/db/repl/repl_client_info.h"
#include "mongo/db/repl/wait_for_majority_service.h"
#include "mongo/db/s/migration_util.h"
+#include "mongo/db/s/sharding_runtime_d_params_gen.h"
#include "mongo/db/s/sharding_statistics.h"
#include "mongo/db/service_context.h"
#include "mongo/db/storage/remove_saver.h"
@@ -66,11 +66,7 @@
#include "mongo/util/future_util.h"
namespace mongo {
-
namespace {
-const WriteConcernOptions kMajorityWriteConcern(WriteConcernOptions::kMajority,
- WriteConcernOptions::SyncMode::UNSET,
- WriteConcernOptions::kWriteConcernTimeoutSharding);
MONGO_FAIL_POINT_DEFINE(hangBeforeDoingDeletion);
MONGO_FAIL_POINT_DEFINE(suspendRangeDeletion);
@@ -239,7 +235,6 @@ StatusWith<int> deleteNextBatch(OperationContext* opCtx,
return numDeleted;
}
-
template <typename Callable>
auto withTemporaryOperationContext(Callable&& callable, const NamespaceString& nss) {
ThreadClient tc(migrationutil::kRangeDeletionThreadName, getGlobalServiceContext());
@@ -299,8 +294,8 @@ void ensureRangeDeletionTaskStillExists(OperationContext* opCtx, const UUID& mig
}
/**
- * Delete the range in a sequence of batches until there are no more documents to
- * delete or deletion returns an error.
+ * Delete the range in a sequence of batches until there are no more documents to delete or deletion
+ * returns an error.
*/
ExecutorFuture<void> deleteRangeInBatches(const std::shared_ptr<executor::TaskExecutor>& executor,
const NamespaceString& nss,
@@ -325,32 +320,34 @@ ExecutorFuture<void> deleteRangeInBatches(const std::shared_ptr<executor::TaskEx
ensureRangeDeletionTaskStillExists(opCtx, *migrationId);
}
- AutoGetCollection collection(opCtx, nss, MODE_IX);
-
- // Ensure the collection exists and has not been dropped or dropped and
- // recreated.
- uassert(
- ErrorCodes::RangeDeletionAbandonedBecauseCollectionWithUUIDDoesNotExist,
- "Collection has been dropped since enqueuing this range "
- "deletion task. No need to delete documents.",
- !collectionUuidHasChanged(
- nss, collection.getCollection(), collectionUuid));
-
- auto numDeleted = uassertStatusOK(deleteNextBatch(opCtx,
- collection.getCollection(),
- keyPattern,
- range,
- numDocsToRemovePerBatch));
- LOGV2_DEBUG(
- 23769,
- 1,
- "Deleted {numDeleted} documents in pass in namespace {namespace} with "
- "UUID {collectionUUID} for range {range}",
- "Deleted documents in pass",
- "numDeleted"_attr = numDeleted,
- "namespace"_attr = nss.ns(),
- "collectionUUID"_attr = collectionUuid,
- "range"_attr = range.toString());
+ int numDeleted;
+
+ {
+ AutoGetCollection collection(opCtx, nss, MODE_IX);
+
+ // Ensure the collection exists and has not been dropped or dropped and
+ // recreated
+ uassert(ErrorCodes::
+ RangeDeletionAbandonedBecauseCollectionWithUUIDDoesNotExist,
+ "Collection has been dropped since enqueuing this range "
+ "deletion task. No need to delete documents.",
+ !collectionUuidHasChanged(
+ nss, collection.getCollection(), collectionUuid));
+
+ numDeleted = uassertStatusOK(deleteNextBatch(opCtx,
+ collection.getCollection(),
+ keyPattern,
+ range,
+ numDocsToRemovePerBatch));
+ }
+
+ LOGV2_DEBUG(23769,
+ 1,
+ "Deleted documents in pass",
+ "numDeleted"_attr = numDeleted,
+ "namespace"_attr = nss.ns(),
+ "collectionUUID"_attr = collectionUuid,
+ "range"_attr = range.toString());
if (numDeleted > 0) {
// (SERVER-62368) The range-deleter executor is mono-threaded, so
@@ -359,21 +356,22 @@ ExecutorFuture<void> deleteRangeInBatches(const std::shared_ptr<executor::TaskEx
opCtx->sleepFor(delayBetweenBatches);
}
- return numDeleted;
+ return numDeleted == 0;
},
nss);
})
- .until([=](StatusWith<int> swNumDeleted) {
- // Continue iterating until there are no more documents to delete, retrying on
- // any error that doesn't indicate that this node is stepping down.
- return (swNumDeleted.isOK() && swNumDeleted.getValue() < numDocsToRemovePerBatch) ||
- swNumDeleted.getStatus() ==
+ .until([=](StatusWith<bool> swAllDocumentsInRangeDeleted) {
+ // Continue iterating until there are no more documents to delete, retrying on any
+ // error that doesn't indicate that this node is stepping down.
+ return (swAllDocumentsInRangeDeleted.isOK() &&
+ swAllDocumentsInRangeDeleted.getValue()) ||
+ swAllDocumentsInRangeDeleted ==
ErrorCodes::RangeDeletionAbandonedBecauseCollectionWithUUIDDoesNotExist ||
- swNumDeleted.getStatus() ==
+ swAllDocumentsInRangeDeleted ==
ErrorCodes::RangeDeletionAbandonedBecauseTaskDocumentDoesNotExist ||
- swNumDeleted.getStatus().code() == ErrorCodes::KeyPatternShorterThanBound ||
- ErrorCodes::isShutdownError(swNumDeleted.getStatus()) ||
- ErrorCodes::isNotPrimaryError(swNumDeleted.getStatus());
+ swAllDocumentsInRangeDeleted == ErrorCodes::KeyPatternShorterThanBound ||
+ ErrorCodes::isShutdownError(swAllDocumentsInRangeDeleted.getStatus()) ||
+ ErrorCodes::isNotPrimaryError(swAllDocumentsInRangeDeleted.getStatus());
})
.on(executor, CancellationToken::uncancelable())
.ignoreValue();
@@ -505,7 +503,6 @@ void deleteRangeDeletionTasksForRename(OperationContext* opCtx,
QUERY(RangeDeletionTask::kNssFieldName << toNss.ns()));
}
-
SharedSemiFuture<void> removeDocumentsInRange(
const std::shared_ptr<executor::TaskExecutor>& executor,
SemiFuture<void> waitForActiveQueriesToComplete,
diff --git a/src/mongo/db/s/range_deletion_util.h b/src/mongo/db/s/range_deletion_util.h
index b2ab01ec168..510acbc8e74 100644
--- a/src/mongo/db/s/range_deletion_util.h
+++ b/src/mongo/db/s/range_deletion_util.h
@@ -28,9 +28,8 @@
*/
#pragma once
-#include <list>
-
#include <boost/optional.hpp>
+#include <list>
#include "mongo/db/namespace_string.h"
#include "mongo/db/s/range_deletion_task_gen.h"
@@ -39,18 +38,6 @@
namespace mongo {
-class BSONObj;
-
-// The maximum number of documents to delete in a single batch during range deletion.
-// secondaryThrottle and rangeDeleterBatchDelayMS apply between each batch.
-// Must be positive or 0 (the default), which means to use the value of
-// internalQueryExecYieldIterations (or 1 if that's negative or zero).
-extern AtomicWord<int> rangeDeleterBatchSize;
-
-// After completing a batch of document deletions, the time in millis to wait before commencing the
-// next batch of deletions.
-extern AtomicWord<int> rangeDeleterBatchDelayMS;
-
/**
* Deletes a range of orphaned documents for the given namespace and collection UUID. Returns a
* future which will be resolved when the range has finished being deleted. The resulting future
diff --git a/src/mongo/db/s/sharding_runtime_d_params.idl b/src/mongo/db/s/sharding_runtime_d_params.idl
index 70f9466365c..793a5b8cab2 100644
--- a/src/mongo/db/s/sharding_runtime_d_params.idl
+++ b/src/mongo/db/s/sharding_runtime_d_params.idl
@@ -32,8 +32,10 @@ server_parameters:
rangeDeleterBatchSize:
description: >-
The maximum number of documents in each batch to delete during the cleanup stage of chunk
- migration (or the cleanupOrphaned command). A value of 0 indicates that the system chooses
- the default value (INT_MAX).
+ migration (or the cleanupOrphaned command). Between each batch, secondaryThrottle and
+ rangeDeleterBatchDelayMS will apply.
+
+ A value of 0 indicates that the system chooses the default value (INT_MAX).
set_at: [startup, runtime]
cpp_vartype: AtomicWord<int>
cpp_varname: rangeDeleterBatchSize
@@ -110,7 +112,7 @@ server_parameters:
default: 10
orphanCleanupDelaySecs:
- description: 'How long to wait before starting cleanup of an emigrated chunk range.'
+ description: How long to wait before starting cleanup of an emigrated chunk range.
set_at: [startup, runtime]
cpp_vartype: AtomicWord<int>
cpp_varname: orphanCleanupDelaySecs