summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAllison Easton <allison.easton@mongodb.com>2022-04-12 07:34:03 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-04-12 18:52:14 +0000
commitcb0e7061575b9879326598cb4bd75fd3ee3588cb (patch)
treef13b9482732cc7600099e0be049cd888dcd57fb9
parent9f06d30d0701e6b224598e19b5d63a257f4ed554 (diff)
downloadmongo-cb0e7061575b9879326598cb4bd75fd3ee3588cb.tar.gz
SERVER-64979 Range deleter must start with same range deletion task on step up
-rw-r--r--jstests/sharding/range_deletion_ordering_with_stepdown.js106
-rw-r--r--src/mongo/db/s/migration_util.cpp32
-rw-r--r--src/mongo/db/s/range_deletion_task.idl3
-rw-r--r--src/mongo/db/s/range_deletion_util.cpp10
-rw-r--r--src/mongo/db/s/range_deletion_util.h5
5 files changed, 149 insertions, 7 deletions
diff --git a/jstests/sharding/range_deletion_ordering_with_stepdown.js b/jstests/sharding/range_deletion_ordering_with_stepdown.js
new file mode 100644
index 00000000000..0f684899b73
--- /dev/null
+++ b/jstests/sharding/range_deletion_ordering_with_stepdown.js
@@ -0,0 +1,106 @@
+/**
+ * Test that an ongoing range deletion is the first range deletion executed upon step up.
+ *
+ * @tags: [
+ * requires_fcv_60,
+ * ]
+ */
+
+(function() {
+'use strict';
+
+load("jstests/libs/fail_point_util.js");
+
+const rangeDeleterBatchSize = 128;
+
+const st = new ShardingTest(
+ {shards: 2, rs: {nodes: 2, setParameter: {rangeDeleterBatchSize: rangeDeleterBatchSize}}});
+
+// Setup database
+const dbName = 'db';
+const db = st.getDB(dbName);
+assert.commandWorked(
+ st.s.adminCommand({enableSharding: dbName, primaryShard: st.shard0.shardName}));
+
+// Setup collection for test with orphans
+const coll = db['test'];
+const nss = coll.getFullName();
+assert.commandWorked(st.s.adminCommand({shardCollection: nss, key: {_id: 1}}));
+
+// Create two chunks
+assert.commandWorked(st.s.adminCommand({split: nss, middle: {_id: 0}}));
+
+// Insert 1000 docs into both chunks.
+const numDocs = 1000;
+let bulk = coll.initializeUnorderedBulkOp();
+for (let i = 1; i <= numDocs; i++) {
+ bulk.insert({_id: i});
+ bulk.insert({_id: -i});
+}
+assert.commandWorked(bulk.execute());
+
+// Pause before first range deletion task
+let beforeDeletionStarts = configureFailPoint(st.shard0, "suspendRangeDeletion");
+assert.commandWorked(db.adminCommand({moveChunk: nss, find: {_id: 1}, to: st.shard1.shardName}));
+assert.commandWorked(db.adminCommand({moveChunk: nss, find: {_id: -1}, to: st.shard1.shardName}));
+
+// Allow first batch from one of the ranges to be deleted
+let beforeDeletionFailpoint = configureFailPoint(st.shard0, "hangBeforeDoingDeletion");
+beforeDeletionStarts.off();
+beforeDeletionFailpoint.wait();
+let afterDeletionFailpoint = configureFailPoint(st.shard0, "hangAfterDoingDeletion");
+beforeDeletionFailpoint.off();
+afterDeletionFailpoint.wait();
+
+// Figure out which range had a batch deleted from it
+let rangeDeletionDocs = st.shard0.getDB("config").getCollection("rangeDeletions").find().toArray();
+assert.eq(rangeDeletionDocs.length, 2);
+let processingDoc, otherDoc;
+if (rangeDeletionDocs[0].numOrphanDocs.valueOf() === numDocs) {
+ assert.eq(rangeDeletionDocs[1].numOrphanDocs, numDocs - rangeDeleterBatchSize);
+ processingDoc = rangeDeletionDocs[1];
+ otherDoc = rangeDeletionDocs[0];
+} else {
+ assert.eq(rangeDeletionDocs[0].numOrphanDocs, numDocs - rangeDeleterBatchSize);
+ assert.eq(rangeDeletionDocs[1].numOrphanDocs, numDocs);
+ processingDoc = rangeDeletionDocs[0];
+ otherDoc = rangeDeletionDocs[1];
+}
+
+// Reorder the tasks on disk to make it more likely they would be submitted out of order
+assert.commandWorked(st.shard0.getDB("config").getCollection("rangeDeletions").deleteMany({}));
+assert.commandWorked(
+ st.shard0.getDB("config").getCollection("rangeDeletions").insert(rangeDeletionDocs[1]));
+assert.commandWorked(
+ st.shard0.getDB("config").getCollection("rangeDeletions").insert(rangeDeletionDocs[0]));
+
+// Step down
+assert.commandWorked(st.rs0.getPrimary().adminCommand({replSetStepDown: 300, force: true}));
+st.rs0.awaitNodesAgreeOnPrimary();
+
+// Allow another batch deletion
+beforeDeletionFailpoint = configureFailPoint(st.shard0, "hangBeforeDoingDeletion");
+afterDeletionFailpoint.off();
+beforeDeletionFailpoint.wait();
+afterDeletionFailpoint = configureFailPoint(st.shard0, "hangAfterDoingDeletion");
+beforeDeletionFailpoint.off();
+afterDeletionFailpoint.wait();
+
+// Make sure the batch deleted was from the same range deletion
+rangeDeletionDocs = st.shard0.getDB("config").getCollection("rangeDeletions").find().toArray();
+assert.eq(rangeDeletionDocs.length, 2);
+rangeDeletionDocs.forEach((doc) => {
+ if (bsonWoCompare(processingDoc.range, doc.range) === 0) {
+ jsTest.log("Same id: " + tojson(doc));
+ assert.eq(doc.numOrphanDocs, numDocs - 2 * rangeDeleterBatchSize);
+ } else {
+ jsTest.log("Diff id: " + tojson(doc));
+ assert.eq(doc.numOrphanDocs, numDocs);
+ }
+});
+
+// Allow everything to finish
+afterDeletionFailpoint.off();
+
+st.stop();
+})();
diff --git a/src/mongo/db/s/migration_util.cpp b/src/mongo/db/s/migration_util.cpp
index 2ab5852c6ec..bf55f7aecf1 100644
--- a/src/mongo/db/s/migration_util.cpp
+++ b/src/mongo/db/s/migration_util.cpp
@@ -510,16 +510,34 @@ void resubmitRangeDeletionsOnStepUp(ServiceContext* serviceContext) {
}
auto opCtx = tc->makeOperationContext();
+ opCtx->setAlwaysInterruptAtStepDownOrUp();
+
+ DBDirectClient client(opCtx.get());
+ FindCommandRequest findCommand(NamespaceString::kRangeDeletionNamespace);
+ findCommand.setFilter(BSON(RangeDeletionTask::kProcessingFieldName << true));
+ auto cursor = client.find(std::move(findCommand));
+ if (cursor->more()) {
+ return migrationutil::submitRangeDeletionTask(
+ opCtx.get(),
+ RangeDeletionTask::parse(IDLParserErrorContext("rangeDeletionRecovery"),
+ cursor->next()));
+ } else {
+ return ExecutorFuture<void>(getMigrationUtilExecutor(serviceContext));
+ }
+ })
+ .then([serviceContext] {
+ ThreadClient tc("ResubmitRangeDeletions", serviceContext);
+ {
+ stdx::lock_guard<Client> lk(*tc.get());
+ tc->setSystemOperationKillableByStepdown(lk);
+ }
+
+ auto opCtx = tc->makeOperationContext();
+ opCtx->setAlwaysInterruptAtStepDownOrUp();
submitPendingDeletions(opCtx.get());
})
- .getAsync([](const Status& status) {
- if (!status.isOK()) {
- LOGV2(45739,
- "Error while submitting pending range deletions",
- "error"_attr = redact(status));
- }
- });
+ .getAsync([](auto) {});
}
void dropRangeDeletionsCollection(OperationContext* opCtx) {
diff --git a/src/mongo/db/s/range_deletion_task.idl b/src/mongo/db/s/range_deletion_task.idl
index af7bf53322e..2e47651abdb 100644
--- a/src/mongo/db/s/range_deletion_task.idl
+++ b/src/mongo/db/s/range_deletion_task.idl
@@ -71,6 +71,9 @@ structs:
type: bool
description: "Flag that is present if the range is not yet ready for deletion"
optional: true
+ processing:
+ type: optionalBool
+ description: "Flag that is present if the range is currently being deleted"
whenToClean:
type: CleanWhen
description: "Enumeration that defines when to cleanup the range."
diff --git a/src/mongo/db/s/range_deletion_util.cpp b/src/mongo/db/s/range_deletion_util.cpp
index 99ef78ed220..311973d4bee 100644
--- a/src/mongo/db/s/range_deletion_util.cpp
+++ b/src/mongo/db/s/range_deletion_util.cpp
@@ -320,6 +320,8 @@ ExecutorFuture<void> deleteRangeInBatches(const std::shared_ptr<executor::TaskEx
!collectionUuidHasChanged(
nss, collection.getCollection(), collectionUuid));
+ markAsProcessingRangeDeletionTask(opCtx, migrationId);
+
auto numDeleted = uassertStatusOK(deleteNextBatch(opCtx,
collection.getCollection(),
keyPattern,
@@ -496,6 +498,14 @@ void deleteRangeDeletionTasksForRename(OperationContext* opCtx,
BSON(RangeDeletionTask::kNssFieldName << toNss.ns()));
}
+void markAsProcessingRangeDeletionTask(OperationContext* opCtx, const UUID& migrationId) {
+ PersistentTaskStore<RangeDeletionTask> store(NamespaceString::kRangeDeletionNamespace);
+ auto query = BSON(RangeDeletionTask::kIdFieldName << migrationId);
+ static const auto update =
+ BSON("$set" << BSON(RangeDeletionTask::kProcessingFieldName << true));
+
+ store.update(opCtx, query, update, WriteConcerns::kLocalWriteConcern);
+}
SharedSemiFuture<void> removeDocumentsInRange(
const std::shared_ptr<executor::TaskExecutor>& executor,
diff --git a/src/mongo/db/s/range_deletion_util.h b/src/mongo/db/s/range_deletion_util.h
index 5d345f8826e..2d537eee8cc 100644
--- a/src/mongo/db/s/range_deletion_util.h
+++ b/src/mongo/db/s/range_deletion_util.h
@@ -113,6 +113,11 @@ void deleteRangeDeletionTasksForRename(OperationContext* opCtx,
const NamespaceString& toNss);
/**
+ * Sets the processing field on the range deletion document for the given migration id.
+ */
+void markAsProcessingRangeDeletionTask(OperationContext* opCtx, const UUID& migrationId);
+
+/**
* Computes and sets the numOrphanDocs field for each document in `config.rangeDeletions` (skips
* documents referring to older incarnations of a collection)
*/