summaryrefslogtreecommitdiff
path: root/src/mongo/db/s/cleanup_orphaned_cmd.cpp
diff options
context:
space:
mode:
authorEsha Maharishi <esha.maharishi@mongodb.com>2020-05-12 17:26:45 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-05-18 04:14:45 +0000
commitd0e6f87351312a299a17f6d63e3f2f4db834ae1f (patch)
treecf758ddf2c85d414d9eab4aa1b318791934aae30 /src/mongo/db/s/cleanup_orphaned_cmd.cpp
parent7e2111ef33fc40959a254bd3109466176ae60718 (diff)
downloadmongo-d0e6f87351312a299a17f6d63e3f2f4db834ae1f.tar.gz
SERVER-47992 Make disableResumableRangeDeleter just prevent ranges from being submitted for deletio
Diffstat (limited to 'src/mongo/db/s/cleanup_orphaned_cmd.cpp')
-rw-r--r--src/mongo/db/s/cleanup_orphaned_cmd.cpp276
1 files changed, 70 insertions, 206 deletions
diff --git a/src/mongo/db/s/cleanup_orphaned_cmd.cpp b/src/mongo/db/s/cleanup_orphaned_cmd.cpp
index 3725ab951d6..97b78e600ba 100644
--- a/src/mongo/db/s/cleanup_orphaned_cmd.cpp
+++ b/src/mongo/db/s/cleanup_orphaned_cmd.cpp
@@ -59,14 +59,8 @@ namespace {
enum class CleanupResult { kDone, kContinue, kError };
/**
- * If the resumable range deleter is disabled:
- * Cleans up one range of orphaned data starting from a range that overlaps or starts at
- * 'startingFromKey'. If empty, startingFromKey is the minimum key of the sharded range.
- *
- * If the resumable range deleter is enabled:
* Waits for all possibly orphaned ranges on 'nss' to be cleaned up.
*
- * @return CleanupResult::kContinue and 'stoppedAtKey' if orphaned range was found and cleaned
* @return CleanupResult::kDone if no orphaned ranges remain
* @return CleanupResult::kError and 'errMsg' if an error occurred
*
@@ -75,205 +69,86 @@ enum class CleanupResult { kDone, kContinue, kError };
CleanupResult cleanupOrphanedData(OperationContext* opCtx,
const NamespaceString& ns,
const BSONObj& startingFromKeyConst,
- BSONObj* stoppedAtKey,
std::string* errMsg) {
- // Note that 'disableResumableRangeDeleter' is a startup-only parameter, so it cannot change
- // while this process is running.
- if (!disableResumableRangeDeleter.load()) {
- boost::optional<ChunkRange> range;
- boost::optional<UUID> collectionUuid;
- {
- AutoGetCollection autoColl(opCtx, ns, MODE_IX);
- if (!autoColl.getCollection()) {
- LOGV2(4416000,
- "cleanupOrphaned skipping waiting for orphaned data cleanup because "
- "{namespace} does not exist",
- "cleanupOrphaned skipping waiting for orphaned data cleanup because "
- "collection does not exist",
- "namespace"_attr = ns.ns());
- return CleanupResult::kDone;
- }
- collectionUuid.emplace(autoColl.getCollection()->uuid());
-
- auto* const css = CollectionShardingRuntime::get(opCtx, ns);
- const auto collDesc = css->getCollectionDescription();
- if (!collDesc.isSharded()) {
- LOGV2(4416001,
- "cleanupOrphaned skipping waiting for orphaned data cleanup because "
- "{namespace} is not sharded",
- "cleanupOrphaned skipping waiting for orphaned data cleanup because "
- "collection is not sharded",
- "namespace"_attr = ns.ns());
- return CleanupResult::kDone;
- }
- range.emplace(collDesc.getMinKey(), collDesc.getMaxKey());
-
- // Though the 'startingFromKey' parameter is not used as the min key of the range to
- // wait for, we still validate that 'startingFromKey' in the same way as the original
- // cleanupOrphaned logic did if 'startingFromKey' is present.
- BSONObj keyPattern = collDesc.getKeyPattern();
- if (!startingFromKeyConst.isEmpty() && !collDesc.isValidKey(startingFromKeyConst)) {
- LOGV2_ERROR_OPTIONS(
- 4416002,
- {logv2::UserAssertAfterLog(ErrorCodes::OrphanedRangeCleanUpFailed)},
- "Could not cleanup orphaned data because start key does not match shard key "
- "pattern",
- "startKey"_attr = redact(startingFromKeyConst),
- "shardKeyPattern"_attr = keyPattern);
- }
+ boost::optional<ChunkRange> range;
+ boost::optional<UUID> collectionUuid;
+ {
+ AutoGetCollection autoColl(opCtx, ns, MODE_IX);
+ if (!autoColl.getCollection()) {
+ LOGV2(4416000,
+ "cleanupOrphaned skipping waiting for orphaned data cleanup because "
+ "{namespace} does not exist",
+ "cleanupOrphaned skipping waiting for orphaned data cleanup because "
+ "collection does not exist",
+ "namespace"_attr = ns.ns());
+ return CleanupResult::kDone;
}
-
- // We actually want to wait until there are no range deletion tasks for this namespace/UUID,
- // but we don't have a good way to wait for that event, so instead we wait for there to be
- // no tasks being processed in memory for this namespace/UUID.
- // However, it's possible this node has recently stepped up, and the stepup recovery task to
- // resubmit range deletion tasks for processing has not yet completed. In that case,
- // waitForClean will return though there are still tasks in config.rangeDeletions, so we
- // sleep for a short time and then try waitForClean again.
- while (auto numRemainingDeletionTasks =
- migrationutil::checkForConflictingDeletions(opCtx, *range, *collectionUuid)) {
- LOGV2(4416003,
- "cleanupOrphaned going to wait for range deletion tasks to complete",
- "namespace"_attr = ns.ns(),
- "collectionUUID"_attr = *collectionUuid,
- "numRemainingDeletionTasks"_attr = numRemainingDeletionTasks);
-
- auto status =
- CollectionShardingRuntime::waitForClean(opCtx, ns, *collectionUuid, *range);
-
- if (!status.isOK()) {
- *errMsg = status.reason();
- return CleanupResult::kError;
- }
-
- opCtx->sleepFor(Milliseconds(1000));
+ collectionUuid.emplace(autoColl.getCollection()->uuid());
+
+ auto* const css = CollectionShardingRuntime::get(opCtx, ns);
+ const auto collDesc = css->getCollectionDescription();
+ if (!collDesc.isSharded()) {
+ LOGV2(4416001,
+ "cleanupOrphaned skipping waiting for orphaned data cleanup because "
+ "{namespace} is not sharded",
+ "cleanupOrphaned skipping waiting for orphaned data cleanup because "
+ "collection is not sharded",
+ "namespace"_attr = ns.ns());
+ return CleanupResult::kDone;
}
-
- return CleanupResult::kDone;
- } else {
-
- BSONObj startingFromKey = startingFromKeyConst;
- boost::optional<ChunkRange> targetRange;
- SharedSemiFuture<void> cleanupCompleteFuture;
-
- {
- AutoGetCollection autoColl(opCtx, ns, MODE_IX);
- auto* const css = CollectionShardingRuntime::get(opCtx, ns);
- // Keep the collection metadata from changing for the rest of this scope.
- auto csrLock = CollectionShardingRuntime::CSRLock::lockShared(opCtx, css);
- const auto collDesc = css->getCollectionDescription();
- if (!collDesc.isSharded()) {
- LOGV2(21911,
- "cleanupOrphaned skipping orphaned data cleanup because collection is not "
- "sharded",
- "namespace"_attr = ns.ns());
- return CleanupResult::kDone;
- }
-
- BSONObj keyPattern = collDesc.getKeyPattern();
- if (!startingFromKey.isEmpty()) {
- if (!collDesc.isValidKey(startingFromKey)) {
- LOGV2_ERROR_OPTIONS(
- 21912,
- {logv2::UserAssertAfterLog(ErrorCodes::OrphanedRangeCleanUpFailed)},
- "Could not cleanup orphaned data, start key {startKey} does not match "
- "shard key pattern {shardKeyPattern}",
- "Could not cleanup orphaned data because start key does not match shard "
- "key pattern",
- "startKey"_attr = redact(startingFromKey),
- "shardKeyPattern"_attr = keyPattern);
- }
- } else {
- startingFromKey = collDesc.getMinKey();
- }
-
- targetRange = css->getNextOrphanRange(startingFromKey);
- if (!targetRange) {
- LOGV2_DEBUG(21913,
- 1,
- "cleanupOrphaned returning because no orphan ranges remain",
- "namespace"_attr = ns.toString(),
- "startingFromKey"_attr = redact(startingFromKey));
-
- return CleanupResult::kDone;
- }
-
- *stoppedAtKey = targetRange->getMax();
-
- cleanupCompleteFuture =
- css->cleanUpRange(*targetRange, boost::none, CollectionShardingRuntime::kNow);
+ range.emplace(collDesc.getMinKey(), collDesc.getMaxKey());
+
+ // Though the 'startingFromKey' parameter is not used as the min key of the range to
+ // wait for, we still validate that 'startingFromKey' in the same way as the original
+ // cleanupOrphaned logic did if 'startingFromKey' is present.
+ BSONObj keyPattern = collDesc.getKeyPattern();
+ if (!startingFromKeyConst.isEmpty() && !collDesc.isValidKey(startingFromKeyConst)) {
+ LOGV2_ERROR_OPTIONS(
+ 4416002,
+ {logv2::UserAssertAfterLog(ErrorCodes::OrphanedRangeCleanUpFailed)},
+ "Could not cleanup orphaned data because start key does not match shard key "
+ "pattern",
+ "startKey"_attr = redact(startingFromKeyConst),
+ "shardKeyPattern"_attr = keyPattern);
}
+ }
- // Sleep waiting for our own deletion. We don't actually care about any others, so there is
- // no need to call css::waitForClean() here.
-
- LOGV2_DEBUG(21914,
- 1,
- "cleanupOrphaned requested for {namespace} starting from {startingFromKey}, "
- "removing next orphan range {targetRange}; waiting...",
- "cleanupOrphaned requested",
- "namespace"_attr = ns.toString(),
- "startingFromKey"_attr = redact(startingFromKey),
- "targetRange"_attr = redact(targetRange->toString()));
-
- Status result = cleanupCompleteFuture.getNoThrow(opCtx);
-
- LOGV2_DEBUG(21915,
- 1,
- "Finished waiting for last {namespace} orphan range cleanup",
- "Finished waiting for last orphan range cleanup in collection",
- "namespace"_attr = ns.toString());
-
- if (!result.isOK()) {
- LOGV2_ERROR_OPTIONS(21916,
- {logv2::UserAssertAfterLog(result.code())},
- "Error waiting for last {namespace} orphan range cleanup: {error}",
- "Error waiting for last orphan range cleanup in collection",
- "namespace"_attr = ns.ns(),
- "error"_attr = redact(result.reason()));
+ // We actually want to wait until there are no range deletion tasks for this namespace/UUID,
+ // but we don't have a good way to wait for that event, so instead we wait for there to be
+ // no tasks being processed in memory for this namespace/UUID.
+ // However, it's possible this node has recently stepped up, and the stepup recovery task to
+ // resubmit range deletion tasks for processing has not yet completed. In that case,
+ // waitForClean will return though there are still tasks in config.rangeDeletions, so we
+ // sleep for a short time and then try waitForClean again.
+ while (auto numRemainingDeletionTasks =
+ migrationutil::checkForConflictingDeletions(opCtx, *range, *collectionUuid)) {
+ uassert(ErrorCodes::ResumableRangeDeleterDisabled,
+ "Failing cleanupOrphaned because the disableResumableRangeDeleter server parameter "
+ "is set to true and this shard contains range deletion tasks for the collection.",
+ !disableResumableRangeDeleter.load());
+
+ LOGV2(4416003,
+ "cleanupOrphaned going to wait for range deletion tasks to complete",
+ "namespace"_attr = ns.ns(),
+ "collectionUUID"_attr = *collectionUuid,
+ "numRemainingDeletionTasks"_attr = numRemainingDeletionTasks);
+
+ auto status = CollectionShardingRuntime::waitForClean(opCtx, ns, *collectionUuid, *range);
+
+ if (!status.isOK()) {
+ *errMsg = status.reason();
+ return CleanupResult::kError;
}
- return CleanupResult::kContinue;
+ opCtx->sleepFor(Milliseconds(1000));
}
+
+ return CleanupResult::kDone;
}
/**
- * If 'disableResumableRangeDeleter=true':
- *
- * Called on a particular namespace, and if the collection is sharded will clean up a single
- * orphaned data range which overlaps or starts after a passed-in 'startingFromKey'. Returns true
- * and a 'stoppedAtKey' (which will start a search for the next orphaned range if the command is
- * called again) or no key if there are no more orphaned ranges in the collection.
- *
- * If the collection is not sharded, returns true but no 'stoppedAtKey'.
- * On failure, returns false and an error message.
- *
- * Calling this command repeatedly until no 'stoppedAtKey' is returned ensures that the
- * full collection range is searched for orphaned documents, but since sharding state may
- * change between calls there is no guarantee that all orphaned documents were found unless
- * the balancer is off.
- *
- * Safe to call with the balancer on.
- *
- * Format:
- *
- * {
- * cleanupOrphaned: <ns>,
- * // optional parameters:
- * startingAtKey: { <shardKeyValue> }, // defaults to lowest value
- * secondaryThrottle: <bool>, // defaults to true
- * // defaults to { w: "majority", wtimeout: 60000 }. Applies to individual writes.
- * writeConcern: { <writeConcern options> }
- * }
- *
- * If 'disableResumableRangeDeleter=false':
- *
* Called on a particular namespace, and if the collection is sharded will wait for the number of
- * range deletion tasks on the collection on this shard to reach zero. Returns true on completion,
- * but never returns 'stoppedAtKey', since it always returns once there are no more orphaned ranges.
- *
- * If the collection is not sharded, returns true and no 'stoppedAtKey'.
- * On failure, returns false and an error message.
+ * range deletion tasks on the collection on this shard to reach zero.
*
* Since the sharding state may change after this call returns, there is no guarantee that orphans
* won't re-appear as a result of migrations that commit after this call returns.
@@ -310,9 +185,6 @@ public:
static BSONField<std::string> nsField;
static BSONField<BSONObj> startingFromKeyField;
- // Output
- static BSONField<BSONObj> stoppedAtKeyField;
-
bool errmsgRun(OperationContext* opCtx,
std::string const& db,
const BSONObj& cmdObj,
@@ -343,19 +215,12 @@ public:
forceShardFilteringMetadataRefresh(opCtx, nss, true /* forceRefreshFromThisThread */);
- BSONObj stoppedAtKey;
- CleanupResult cleanupResult =
- cleanupOrphanedData(opCtx, nss, startingFromKey, &stoppedAtKey, &errmsg);
+ CleanupResult cleanupResult = cleanupOrphanedData(opCtx, nss, startingFromKey, &errmsg);
if (cleanupResult == CleanupResult::kError) {
return false;
}
-
- if (cleanupResult == CleanupResult::kContinue) {
- result.append(stoppedAtKeyField(), stoppedAtKey);
- } else {
- dassert(cleanupResult == CleanupResult::kDone);
- }
+ dassert(cleanupResult == CleanupResult::kDone);
return true;
}
@@ -364,7 +229,6 @@ public:
BSONField<std::string> CleanupOrphanedCommand::nsField("cleanupOrphaned");
BSONField<BSONObj> CleanupOrphanedCommand::startingFromKeyField("startingFromKey");
-BSONField<BSONObj> CleanupOrphanedCommand::stoppedAtKeyField("stoppedAtKey");
} // namespace
} // namespace mongo