summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_auth.yml2
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_auth_audit.yml2
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml4
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_misc.yml8
-rw-r--r--jstests/libs/cleanup_orphaned_util.js6
-rw-r--r--jstests/sharding/cleanup_orphaned.js19
-rw-r--r--jstests/sharding/cleanup_orphaned_auth.js56
-rw-r--r--jstests/sharding/cleanup_orphaned_basic.js124
-rw-r--r--jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js163
-rw-r--r--jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js133
-rw-r--r--jstests/sharding/cleanup_orphaned_cmd_hashed.js80
-rw-r--r--jstests/sharding/cleanup_orphaned_cmd_prereload.js98
-rw-r--r--jstests/sharding/cleanup_orphaned_compound.js20
13 files changed, 714 insertions, 1 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_auth.yml b/buildscripts/resmokeconfig/suites/sharding_auth.yml
index 8cdc924883a..5f361dd80cb 100644
--- a/buildscripts/resmokeconfig/suites/sharding_auth.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_auth.yml
@@ -17,6 +17,8 @@ selector:
# Skip these additional tests when running with auth enabled.
- jstests/sharding/parallel.js
# Skip the testcases that do not have auth bypass when running ops in parallel.
+ - jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js # SERVER-21713
+ - jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js # SERVER-21713
- jstests/sharding/migration_ignore_interrupts_1.js # SERVER-21713
- jstests/sharding/migration_ignore_interrupts_2.js # SERVER-21713
- jstests/sharding/migration_ignore_interrupts_3.js # SERVER-21713
diff --git a/buildscripts/resmokeconfig/suites/sharding_auth_audit.yml b/buildscripts/resmokeconfig/suites/sharding_auth_audit.yml
index 7813b446927..25cf393d692 100644
--- a/buildscripts/resmokeconfig/suites/sharding_auth_audit.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_auth_audit.yml
@@ -17,6 +17,8 @@ selector:
# Skip these additional tests when running with auth enabled.
- jstests/sharding/parallel.js
# Skip the testcases that do not have auth bypass when running ops in parallel.
+ - jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js # SERVER-21713
+ - jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js # SERVER-21713
- jstests/sharding/migration_with_source_ops.js # SERVER-21713
- jstests/sharding/migration_sets_fromMigrate_flag.js # SERVER-21713
- jstests/sharding/migration_ignore_interrupts_1.js # SERVER-21713
diff --git a/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml
index 7bcbeab0bfb..e23481be51c 100644
--- a/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml
@@ -6,6 +6,7 @@ selector:
exclude_files:
# Auth tests require authentication on the stepdown thread's connection
- jstests/sharding/auth*.js
+ - jstests/sharding/cleanup_orphaned_auth.js
- jstests/sharding/localhostAuthBypass.js
- jstests/sharding/kill_sessions.js
- jstests/sharding/mongos_rs_auth_shard_failure_tolerance.js
@@ -137,6 +138,9 @@ selector:
# ShardingTest is never used, so continuous step down thread never starts
- jstests/sharding/config_rs_change.js
- jstests/sharding/empty_cluster_init.js
+ # Temporarily blacklisted until more robust
+ - jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js
+ - jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js
# Expects same secondaries for entire test
- jstests/sharding/commands_that_write_accept_wc_configRS.js
- jstests/sharding/commands_that_write_accept_wc_shards.js
diff --git a/buildscripts/resmokeconfig/suites/sharding_misc.yml b/buildscripts/resmokeconfig/suites/sharding_misc.yml
index 23f5460dc4d..f7446f41245 100644
--- a/buildscripts/resmokeconfig/suites/sharding_misc.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_misc.yml
@@ -122,6 +122,7 @@ selector:
- jstests/sharding/shard3.js
- jstests/sharding/merge_chunks_test.js
- jstests/sharding/move_stale_mongos.js
+ - jstests/sharding/cleanup_orphaned_basic.js
- jstests/sharding/validate_collection.js
- jstests/sharding/change_stream_enforce_max_time_ms_on_mongos.js
- jstests/sharding/unowned_doc_filtering.js
@@ -173,11 +174,15 @@ selector:
- jstests/sharding/causal_consistency_shell_support.js
- jstests/sharding/change_streams_establishment_finds_new_shards.js
- jstests/sharding/retryable_writes.js
+ - jstests/sharding/cleanup_orphaned_cmd_prereload.js
- jstests/sharding/basic_merge.js
- jstests/sharding/migration_critical_section_concurrency.js
- jstests/sharding/sort1.js
+ - jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js
- jstests/sharding/upsert_sharded.js
+ - jstests/sharding/cleanup_orphaned_cmd_hashed.js
- jstests/sharding/addshard5.js
+ - jstests/sharding/cleanup_orphaned_compound.js
- jstests/sharding/agg_sort.js
- jstests/sharding/remove1.js
- jstests/sharding/shard_targeting.js
@@ -194,6 +199,8 @@ selector:
- jstests/sharding/count2.js
- jstests/sharding/no_empty_reset.js
- jstests/sharding/kill_pinned_cursor.js
+ - jstests/sharding/cleanup_orphaned.js
+ - jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js
- jstests/sharding/sharded_limit_batchsize.js
- jstests/sharding/migration_sets_fromMigrate_flag.js
- jstests/sharding/change_stream_metadata_notifications.js
@@ -301,6 +308,7 @@ selector:
- jstests/sharding/hash_single_shard.js
- jstests/sharding/version2.js
- jstests/sharding/advance_logical_time_with_valid_signature.js
+ - jstests/sharding/cleanup_orphaned_auth.js
- jstests/sharding/mrShardedOutputAuth.js
- jstests/sharding/split_against_shard_with_invalid_split_points.js
- jstests/sharding/version1.js
diff --git a/jstests/libs/cleanup_orphaned_util.js b/jstests/libs/cleanup_orphaned_util.js
index 3990c148df4..a88c9e8dbf7 100644
--- a/jstests/libs/cleanup_orphaned_util.js
+++ b/jstests/libs/cleanup_orphaned_util.js
@@ -36,7 +36,11 @@ function cleanupOrphaned(shardConnection, ns, expectedIterations) {
// keyGen: function() { return [{ a: 'foo', b: 1 }, { a: 'bar', b: 2 }]; }
// }
function testCleanupOrphaned(options) {
- var st = new ShardingTest({shards: 2, mongos: 2});
+ var st = new ShardingTest({
+ shards: 2,
+ mongos: 2,
+ shardOptions: {setParameter: {"disableResumableRangeDeleter": true}}
+ });
var mongos = st.s0, admin = mongos.getDB('admin'),
shards = mongos.getCollection('config.shards').find().toArray(),
diff --git a/jstests/sharding/cleanup_orphaned.js b/jstests/sharding/cleanup_orphaned.js
new file mode 100644
index 00000000000..6359d4be110
--- /dev/null
+++ b/jstests/sharding/cleanup_orphaned.js
@@ -0,0 +1,19 @@
+//
+// Shards data from the key range, then inserts orphan documents, runs cleanupOrphans
+// and makes sure that orphans are removed. Uses an _id as a shard key.
+//
+// requires_fcv_44 because the 'disableResumableRangeDeleter' parameter was introduced in v4.4.
+// @tags: [requires_fcv_44]
+
+load('./jstests/libs/cleanup_orphaned_util.js');
+
+testCleanupOrphaned({
+ shardKey: {_id: 1},
+ keyGen: function() {
+ var ids = [];
+ for (var i = -50; i < 50; i++) {
+ ids.push({_id: i});
+ }
+ return ids;
+ }
+});
diff --git a/jstests/sharding/cleanup_orphaned_auth.js b/jstests/sharding/cleanup_orphaned_auth.js
new file mode 100644
index 00000000000..8b0fac00f5d
--- /dev/null
+++ b/jstests/sharding/cleanup_orphaned_auth.js
@@ -0,0 +1,56 @@
+//
+// Tests of cleanupOrphaned command permissions.
+//
+
+(function() {
+'use strict';
+
+// TODO SERVER-35447: Multiple users cannot be authenticated on one connection within a session.
+TestData.disableImplicitSessions = true;
+
+function assertUnauthorized(res, msg) {
+ if (assert._debug && msg)
+ print("in assert for: " + msg);
+
+ if (res.ok == 0 &&
+ (res.errmsg.startsWith('not authorized') || res.errmsg.match(/requires authentication/)))
+ return;
+
+ var finalMsg = "command worked when it should have been unauthorized: " + tojson(res);
+ if (msg) {
+ finalMsg += " : " + msg;
+ }
+ doassert(finalMsg);
+}
+
+var st = new ShardingTest({auth: true, other: {keyFile: 'jstests/libs/key1', useHostname: false}});
+
+var shardAdmin = st.shard0.getDB('admin');
+shardAdmin.createUser({user: 'admin', pwd: 'x', roles: ['clusterAdmin', 'userAdminAnyDatabase']});
+shardAdmin.auth('admin', 'x');
+
+var mongos = st.s0;
+var mongosAdmin = mongos.getDB('admin');
+var coll = mongos.getCollection('foo.bar');
+
+mongosAdmin.createUser({user: 'admin', pwd: 'x', roles: ['clusterAdmin', 'userAdminAnyDatabase']});
+mongosAdmin.auth('admin', 'x');
+
+assert.commandWorked(mongosAdmin.runCommand({enableSharding: coll.getDB().getName()}));
+
+assert.commandWorked(
+ mongosAdmin.runCommand({shardCollection: coll.getFullName(), key: {_id: 'hashed'}}));
+
+// cleanupOrphaned requires auth as admin user.
+assert.commandWorked(shardAdmin.logout());
+assertUnauthorized(shardAdmin.runCommand({cleanupOrphaned: 'foo.bar'}));
+
+var fooDB = st.shard0.getDB('foo');
+shardAdmin.auth('admin', 'x');
+fooDB.createUser({user: 'user', pwd: 'x', roles: ['readWrite', 'dbAdmin']});
+shardAdmin.logout();
+fooDB.auth('user', 'x');
+assertUnauthorized(shardAdmin.runCommand({cleanupOrphaned: 'foo.bar'}));
+
+st.stop();
+})();
diff --git a/jstests/sharding/cleanup_orphaned_basic.js b/jstests/sharding/cleanup_orphaned_basic.js
new file mode 100644
index 00000000000..9e785300050
--- /dev/null
+++ b/jstests/sharding/cleanup_orphaned_basic.js
@@ -0,0 +1,124 @@
+//
+// Basic tests of cleanupOrphaned. Validates that non allowed uses of the cleanupOrphaned
+// command fail.
+//
+// requires_fcv_44 because the 'disableResumableRangeDeleter' parameter was introduced in v4.4.
+// @tags: [requires_fcv_44]
+
+(function() {
+"use strict";
+
+/*****************************************************************************
+ * Unsharded mongod.
+ ****************************************************************************/
+
+// cleanupOrphaned fails against unsharded mongod.
+var mongod = MongoRunner.runMongod();
+assert.commandFailed(mongod.getDB('admin').runCommand({cleanupOrphaned: 'foo.bar'}));
+
+/*****************************************************************************
+ * Bad invocations of cleanupOrphaned command.
+ ****************************************************************************/
+
+var st = new ShardingTest({
+ other: {rs: true, rsOptions: {nodes: 2, setParameter: {"disableResumableRangeDeleter": true}}}
+});
+
+var mongos = st.s0;
+var mongosAdmin = mongos.getDB('admin');
+var dbName = 'foo';
+var collectionName = 'bar';
+var ns = dbName + '.' + collectionName;
+var coll = mongos.getCollection(ns);
+
+// cleanupOrphaned fails against mongos ('no such command'): it must be run
+// on mongod.
+assert.commandFailed(mongosAdmin.runCommand({cleanupOrphaned: ns}));
+
+// cleanupOrphaned must be run on admin DB.
+var shardFooDB = st.shard0.getDB(dbName);
+assert.commandFailed(shardFooDB.runCommand({cleanupOrphaned: ns}));
+
+// Must be run on primary.
+var secondaryAdmin = st.rs0.getSecondary().getDB('admin');
+var response = secondaryAdmin.runCommand({cleanupOrphaned: ns});
+print('cleanupOrphaned on secondary:');
+printjson(response);
+assert.commandFailed(response);
+
+var shardAdmin = st.shard0.getDB('admin');
+var badNS = ' \\/."*<>:|?';
+assert.commandFailed(shardAdmin.runCommand({cleanupOrphaned: badNS}));
+
+// cleanupOrphaned works on sharded collection.
+assert.commandWorked(mongosAdmin.runCommand({enableSharding: coll.getDB().getName()}));
+
+st.ensurePrimaryShard(coll.getDB().getName(), st.shard0.shardName);
+
+assert.commandWorked(mongosAdmin.runCommand({shardCollection: ns, key: {_id: 1}}));
+
+assert.commandWorked(shardAdmin.runCommand({cleanupOrphaned: ns}));
+
+/*****************************************************************************
+ * Empty shard.
+ ****************************************************************************/
+
+// Ping shard[1] so it will be aware that it is sharded. Otherwise cleanupOrphaned
+// may fail.
+assert.commandWorked(mongosAdmin.runCommand({
+ moveChunk: coll.getFullName(),
+ find: {_id: 1},
+ to: st.shard1.shardName,
+ _waitForDelete: true
+}));
+
+assert.commandWorked(mongosAdmin.runCommand({
+ moveChunk: coll.getFullName(),
+ find: {_id: 1},
+ to: st.shard0.shardName,
+ _waitForDelete: true
+}));
+
+// Collection's home is shard0, there are no chunks assigned to shard1.
+st.shard1.getCollection(ns).insert({});
+assert.eq(null, st.shard1.getDB(dbName).getLastError());
+assert.eq(1, st.shard1.getCollection(ns).count());
+response = st.shard1.getDB('admin').runCommand({cleanupOrphaned: ns});
+assert.commandWorked(response);
+assert.eq({_id: {$maxKey: 1}}, response.stoppedAtKey);
+assert.eq(
+ 0, st.shard1.getCollection(ns).count(), "cleanupOrphaned didn't delete orphan on empty shard.");
+
+/*****************************************************************************
+ * Bad startingFromKeys.
+ ****************************************************************************/
+
+// startingFromKey of MaxKey.
+response = shardAdmin.runCommand({cleanupOrphaned: ns, startingFromKey: {_id: MaxKey}});
+assert.commandWorked(response);
+assert.eq(null, response.stoppedAtKey);
+
+// startingFromKey doesn't match number of fields in shard key.
+assert.commandFailed(shardAdmin.runCommand(
+ {cleanupOrphaned: ns, startingFromKey: {someKey: 'someValue', someOtherKey: 1}}));
+
+// startingFromKey matches number of fields in shard key but not field names.
+assert.commandFailed(
+ shardAdmin.runCommand({cleanupOrphaned: ns, startingFromKey: {someKey: 'someValue'}}));
+
+var coll2 = mongos.getCollection('foo.baz');
+
+assert.commandWorked(
+ mongosAdmin.runCommand({shardCollection: coll2.getFullName(), key: {a: 1, b: 1}}));
+
+// startingFromKey doesn't match number of fields in shard key.
+assert.commandFailed(shardAdmin.runCommand(
+ {cleanupOrphaned: coll2.getFullName(), startingFromKey: {someKey: 'someValue'}}));
+
+// startingFromKey matches number of fields in shard key but not field names.
+assert.commandFailed(shardAdmin.runCommand(
+ {cleanupOrphaned: coll2.getFullName(), startingFromKey: {a: 'someValue', c: 1}}));
+
+st.stop();
+MongoRunner.stopMongod(mongod);
+})();
diff --git a/jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js
new file mode 100644
index 00000000000..6f3ae3ba8ac
--- /dev/null
+++ b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js
@@ -0,0 +1,163 @@
+//
+// Tests cleanupOrphaned concurrent with moveChunk.
+// Inserts orphan documents to the donor and recipient shards during the moveChunk and
+// verifies that cleanupOrphaned removes orphans.
+//
+// requires_fcv_44 because the 'disableResumableRangeDeleter' parameter was introduced in v4.4.
+// @tags: [requires_fcv_44]
+
+load('./jstests/libs/chunk_manipulation_util.js');
+load('./jstests/libs/cleanup_orphaned_util.js');
+
+(function() {
+"use strict";
+
+var staticMongod = MongoRunner.runMongod({}); // For startParallelOps.
+var st = new ShardingTest({
+ shards: 2,
+ other: {separateConfig: true},
+ shardOptions: {setParameter: {"disableResumableRangeDeleter": true}}
+});
+
+var mongos = st.s0, admin = mongos.getDB('admin'), dbName = 'foo', ns = dbName + '.bar',
+ coll = mongos.getCollection(ns), donor = st.shard0, recipient = st.shard1,
+ donorColl = donor.getCollection(ns), recipientColl = st.shard1.getCollection(ns);
+
+// Three chunks of 10 documents each, with ids -20, -18, -16, ..., 38.
+// Donor: [minKey, 0) [0, 20)
+// Recipient: [20, maxKey)
+assert.commandWorked(admin.runCommand({enableSharding: dbName}));
+printjson(admin.runCommand({movePrimary: dbName, to: st.shard0.shardName}));
+assert.commandWorked(admin.runCommand({shardCollection: ns, key: {_id: 1}}));
+assert.commandWorked(admin.runCommand({split: ns, middle: {_id: 0}}));
+assert.commandWorked(admin.runCommand({split: ns, middle: {_id: 20}}));
+assert.commandWorked(admin.runCommand(
+ {moveChunk: ns, find: {_id: 20}, to: st.shard1.shardName, _waitForDelete: true}));
+
+jsTest.log('Inserting 20 docs into shard 0....');
+for (var i = -20; i < 20; i += 2) {
+ coll.insert({_id: i});
+}
+assert.eq(null, coll.getDB().getLastError());
+assert.eq(20, donorColl.count());
+
+jsTest.log('Inserting 10 docs into shard 1....');
+for (i = 20; i < 40; i += 2) {
+ coll.insert({_id: i});
+}
+assert.eq(null, coll.getDB().getLastError());
+assert.eq(10, recipientColl.count());
+
+//
+// Start a moveChunk in the background. Move chunk [0, 20), which has 10 docs,
+// from shard 0 to shard 1. Pause it at some points in the donor's and
+// recipient's work flows, and test cleanupOrphaned on shard 0 and shard 1.
+//
+
+jsTest.log('setting failpoint startedMoveChunk (donor) and cloned (recipient)');
+pauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk);
+pauseMigrateAtStep(recipient, migrateStepNames.cloned);
+var joinMoveChunk = moveChunkParallel(
+ staticMongod, st.s0.host, {_id: 0}, null, coll.getFullName(), st.shard1.shardName);
+
+waitForMoveChunkStep(donor, moveChunkStepNames.startedMoveChunk);
+waitForMigrateStep(recipient, migrateStepNames.cloned);
+// Recipient has run _recvChunkStart and begun its migration thread; docs have
+// been cloned and chunk [0, 20) is noted as 'pending' on recipient.
+
+// Donor: [minKey, 0) [0, 20)
+// Recipient (pending): [0, 20)
+// Recipient: [20, maxKey)
+
+// Create orphans. I'll show an orphaned doc on donor with _id 26 like {26}:
+//
+// Donor: [minKey, 0) [0, 20) {26}
+// Recipient (pending): [0, 20)
+// Recipient: {-1} [20, maxKey)
+donorColl.insert([{_id: 26}]);
+assert.eq(null, donorColl.getDB().getLastError());
+assert.eq(21, donorColl.count());
+recipientColl.insert([{_id: -1}]);
+assert.eq(null, recipientColl.getDB().getLastError());
+assert.eq(21, recipientColl.count());
+
+cleanupOrphaned(donor, ns, 2);
+assert.eq(20, donorColl.count());
+cleanupOrphaned(recipient, ns, 2);
+assert.eq(20, recipientColl.count());
+
+jsTest.log('Inserting document on donor side');
+// Inserted a new document (not an orphan) with id 19, which belongs in the
+// [0, 20) chunk.
+donorColl.insert({_id: 19});
+assert.eq(null, coll.getDB().getLastError());
+assert.eq(21, donorColl.count());
+
+// Recipient transfers this modification.
+jsTest.log('Let migrate proceed to transferredMods');
+proceedToMigrateStep(recipient, migrateStepNames.catchup);
+jsTest.log('Done letting migrate proceed to transferredMods');
+
+assert.eq(21, recipientColl.count(), "Recipient didn't transfer inserted document.");
+
+cleanupOrphaned(donor, ns, 2);
+assert.eq(21, donorColl.count());
+cleanupOrphaned(recipient, ns, 2);
+assert.eq(21, recipientColl.count());
+
+// Create orphans.
+donorColl.insert([{_id: 26}]);
+assert.eq(null, donorColl.getDB().getLastError());
+assert.eq(22, donorColl.count());
+recipientColl.insert([{_id: -1}]);
+assert.eq(null, recipientColl.getDB().getLastError());
+assert.eq(22, recipientColl.count());
+
+cleanupOrphaned(donor, ns, 2);
+assert.eq(21, donorColl.count());
+cleanupOrphaned(recipient, ns, 2);
+assert.eq(21, recipientColl.count());
+
+// Recipient has been waiting for donor to call _recvChunkCommit.
+pauseMoveChunkAtStep(donor, moveChunkStepNames.committed);
+unpauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk);
+proceedToMigrateStep(recipient, migrateStepNames.steady);
+proceedToMigrateStep(recipient, migrateStepNames.done);
+
+// Create orphans.
+donorColl.insert([{_id: 26}]);
+assert.eq(null, donorColl.getDB().getLastError());
+assert.eq(22, donorColl.count());
+recipientColl.insert([{_id: -1}]);
+assert.eq(null, recipientColl.getDB().getLastError());
+assert.eq(22, recipientColl.count());
+
+cleanupOrphaned(donor, ns, 2);
+assert.eq(21, donorColl.count());
+cleanupOrphaned(recipient, ns, 2);
+assert.eq(21, recipientColl.count());
+
+// Let recipient side of the migration finish so that the donor can proceed with the commit.
+unpauseMigrateAtStep(recipient, migrateStepNames.done);
+waitForMoveChunkStep(donor, moveChunkStepNames.committed);
+
+// Donor is paused after the migration chunk commit, but before it finishes the cleanup that
+// includes running the range deleter. Thus it technically has orphaned data -- commit is
+// complete, but moved data is still present. cleanupOrphaned can remove the data the donor
+// would otherwise clean up itself in its post-move delete phase.
+cleanupOrphaned(donor, ns, 2);
+assert.eq(10, donorColl.count());
+
+// Let the donor migration finish.
+unpauseMoveChunkAtStep(donor, moveChunkStepNames.committed);
+joinMoveChunk();
+
+// Donor has finished post-move delete, which had nothing to remove with the range deleter
+// because of the preemptive cleanupOrphaned call.
+assert.eq(10, donorColl.count());
+assert.eq(21, recipientColl.count());
+assert.eq(31, coll.count());
+
+st.stop();
+MongoRunner.stopMongod(staticMongod);
+})();
diff --git a/jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js
new file mode 100644
index 00000000000..2ab59df4a54
--- /dev/null
+++ b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js
@@ -0,0 +1,133 @@
+//
+//
+// Tests cleanupOrphaned concurrent with moveChunk with a hashed shard key.
+// Inserts orphan documents to the donor and recipient shards during the moveChunk and
+// verifies that cleanupOrphaned removes orphans.
+//
+// requires_fcv_44 because the 'disableResumableRangeDeleter' parameter was introduced in v4.4.
+// @tags: [requires_fcv_44]
+
+load('./jstests/libs/chunk_manipulation_util.js');
+load('./jstests/libs/cleanup_orphaned_util.js');
+
+(function() {
+"use strict";
+
+var staticMongod = MongoRunner.runMongod({}); // For startParallelOps.
+var st = new ShardingTest({
+ shards: 2,
+ other: {separateConfig: true},
+ shardOptions: {setParameter: {"disableResumableRangeDeleter": true}}
+});
+
+var mongos = st.s0, admin = mongos.getDB('admin'), dbName = 'foo', ns = dbName + '.bar',
+ coll = mongos.getCollection(ns);
+
+assert.commandWorked(admin.runCommand({enableSharding: dbName}));
+printjson(admin.runCommand({movePrimary: dbName, to: st.shard0.shardName}));
+assert.commandWorked(admin.runCommand({shardCollection: ns, key: {key: 'hashed'}}));
+
+// Makes four chunks by default, two on each shard.
+var chunks = st.config.chunks.find({ns: ns}).sort({min: 1}).toArray();
+assert.eq(4, chunks.length);
+
+var chunkWithDoc = chunks[1];
+print('Trying to make doc that hashes to this chunk: ' + tojson(chunkWithDoc));
+
+var found = false;
+for (var i = 0; i < 10000; i++) {
+ var doc = {key: ObjectId()}, hash = mongos.adminCommand({_hashBSONElement: doc.key}).out;
+
+ print('doc.key ' + doc.key + ' hashes to ' + hash);
+
+ if (mongos.getCollection('config.chunks')
+ .findOne({_id: chunkWithDoc._id, 'min.key': {$lte: hash}, 'max.key': {$gt: hash}})) {
+ found = true;
+ break;
+ }
+}
+
+assert(found, "Couldn't make doc that belongs to chunk 1.");
+print('Doc: ' + tojson(doc));
+coll.insert(doc);
+assert.eq(null, coll.getDB().getLastError());
+
+//
+// Start a moveChunk in the background from shard 0 to shard 1. Pause it at
+// some points in the donor's and recipient's work flows, and test
+// cleanupOrphaned.
+//
+
+var donor, recip;
+if (chunkWithDoc.shard == st.shard0.shardName) {
+ donor = st.shard0;
+ recip = st.shard1;
+} else {
+ recip = st.shard0;
+ donor = st.shard1;
+}
+
+jsTest.log('setting failpoint startedMoveChunk (donor) and cloned (recipient)');
+pauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk);
+pauseMigrateAtStep(recip, migrateStepNames.cloned);
+
+var joinMoveChunk = moveChunkParallel(staticMongod,
+ st.s0.host,
+ null,
+ [chunkWithDoc.min, chunkWithDoc.max], // bounds
+ coll.getFullName(),
+ recip.shardName);
+
+waitForMoveChunkStep(donor, moveChunkStepNames.startedMoveChunk);
+waitForMigrateStep(recip, migrateStepNames.cloned);
+proceedToMigrateStep(recip, migrateStepNames.catchup);
+// recipient has run _recvChunkStart and begun its migration thread;
+// 'doc' has been cloned and chunkWithDoc is noted as 'pending' on recipient.
+
+var donorColl = donor.getCollection(ns), recipColl = recip.getCollection(ns);
+
+assert.eq(1, donorColl.count());
+assert.eq(1, recipColl.count());
+
+// cleanupOrphaned should go through two iterations, since the default chunk
+// setup leaves two unowned ranges on each shard.
+cleanupOrphaned(donor, ns, 2);
+cleanupOrphaned(recip, ns, 2);
+assert.eq(1, donorColl.count());
+assert.eq(1, recipColl.count());
+
+// recip has been waiting for donor to call _recvChunkCommit.
+pauseMoveChunkAtStep(donor, moveChunkStepNames.committed);
+unpauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk);
+proceedToMigrateStep(recip, migrateStepNames.steady);
+proceedToMigrateStep(recip, migrateStepNames.done);
+
+cleanupOrphaned(donor, ns, 2);
+assert.eq(1, donorColl.count());
+cleanupOrphaned(recip, ns, 2);
+assert.eq(1, recipColl.count());
+
+// Let recip side of the migration finish so that the donor proceeds with the commit.
+unpauseMigrateAtStep(recip, migrateStepNames.done);
+waitForMoveChunkStep(donor, moveChunkStepNames.committed);
+
+// Donor is paused after the migration chunk commit, but before it finishes the cleanup that
+// includes running the range deleter. Thus it technically has orphaned data -- commit is
+// complete, but moved data is still present. cleanupOrphaned can remove the data the donor
+// would otherwise clean up itself in its post-move delete phase.
+cleanupOrphaned(donor, ns, 2);
+assert.eq(0, donorColl.count());
+
+// Let migration thread complete.
+unpauseMoveChunkAtStep(donor, moveChunkStepNames.committed);
+joinMoveChunk();
+
+// donor has finished post-move delete, which had nothing to remove with the range deleter
+// because of the preemptive cleanupOrphaned call.
+assert.eq(0, donorColl.count());
+assert.eq(1, recipColl.count());
+assert.eq(1, coll.count());
+
+MongoRunner.stopMongod(staticMongod);
+st.stop();
+})();
diff --git a/jstests/sharding/cleanup_orphaned_cmd_hashed.js b/jstests/sharding/cleanup_orphaned_cmd_hashed.js
new file mode 100644
index 00000000000..5efa4ad6c40
--- /dev/null
+++ b/jstests/sharding/cleanup_orphaned_cmd_hashed.js
@@ -0,0 +1,80 @@
+//
+// Tests cleanup of orphaned data in hashed sharded coll via the orphaned data cleanup command
+//
+// requires_fcv_44 because the 'disableResumableRangeDeleter' parameter was introduced in v4.4.
+// @tags: [requires_fcv_44]
+
+(function() {
+"use strict";
+
+var st = new ShardingTest(
+ {shards: 2, mongos: 1, shardOptions: {setParameter: {"disableResumableRangeDeleter": true}}});
+
+var mongos = st.s0;
+var admin = mongos.getDB("admin");
+var coll = mongos.getCollection("foo.bar");
+
+assert.commandWorked(admin.runCommand({enableSharding: coll.getDB() + ""}));
+printjson(admin.runCommand({movePrimary: coll.getDB() + "", to: st.shard0.shardName}));
+assert.commandWorked(admin.runCommand({shardCollection: coll + "", key: {_id: "hashed"}}));
+
+// Create two orphaned data holes, one bounded by min or max on each shard
+
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: NumberLong(-100)}}));
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: NumberLong(-50)}}));
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: NumberLong(50)}}));
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: NumberLong(100)}}));
+assert.commandWorked(admin.runCommand({
+ moveChunk: coll + "",
+ bounds: [{_id: NumberLong(-100)}, {_id: NumberLong(-50)}],
+ to: st.shard1.shardName,
+ _waitForDelete: true
+}));
+assert.commandWorked(admin.runCommand({
+ moveChunk: coll + "",
+ bounds: [{_id: NumberLong(50)}, {_id: NumberLong(100)}],
+ to: st.shard0.shardName,
+ _waitForDelete: true
+}));
+st.printShardingStatus();
+
+jsTest.log("Inserting some docs on each shard, so 1/2 will be orphaned...");
+
+for (var s = 0; s < 2; s++) {
+ var shardColl = (s == 0 ? st.shard0 : st.shard1).getCollection(coll + "");
+ var bulk = shardColl.initializeUnorderedBulkOp();
+ for (var i = 0; i < 100; i++)
+ bulk.insert({_id: i});
+ assert.commandWorked(bulk.execute());
+}
+
+assert.eq(200,
+ st.shard0.getCollection(coll + "").find().itcount() +
+ st.shard1.getCollection(coll + "").find().itcount());
+assert.eq(100, coll.find().itcount());
+
+jsTest.log("Cleaning up orphaned data in hashed coll...");
+
+for (var s = 0; s < 2; s++) {
+ var shardAdmin = (s == 0 ? st.shard0 : st.shard1).getDB("admin");
+
+ var result = shardAdmin.runCommand({cleanupOrphaned: coll + ""});
+ while (result.ok && result.stoppedAtKey) {
+ printjson(result);
+ result = shardAdmin.runCommand(
+ {cleanupOrphaned: coll + "", startingFromKey: result.stoppedAtKey});
+ }
+
+ printjson(result);
+ assert(result.ok);
+}
+
+assert.eq(100,
+ st.shard0.getCollection(coll + "").find().itcount() +
+ st.shard1.getCollection(coll + "").find().itcount());
+assert.eq(100, coll.find().itcount());
+
+jsTest.log("DONE!");
+
+st.stop();
+})();
diff --git a/jstests/sharding/cleanup_orphaned_cmd_prereload.js b/jstests/sharding/cleanup_orphaned_cmd_prereload.js
new file mode 100644
index 00000000000..33aa8bca9c8
--- /dev/null
+++ b/jstests/sharding/cleanup_orphaned_cmd_prereload.js
@@ -0,0 +1,98 @@
+//
+// Tests failed cleanup of orphaned data when we have pending chunks
+//
+// requires_fcv_44 because the 'disableResumableRangeDeleter' parameter was introduced in v4.4.
+// @tags: [requires_fcv_44]
+
+var st = new ShardingTest(
+ {shards: 2, shardOptions: {setParameter: {"disableResumableRangeDeleter": true}}});
+
+var mongos = st.s0;
+var admin = mongos.getDB("admin");
+var coll = mongos.getCollection("foo.bar");
+
+assert.commandWorked(admin.runCommand({enableSharding: coll.getDB() + ""}));
+printjson(admin.runCommand({movePrimary: coll.getDB() + "", to: st.shard0.shardName}));
+assert.commandWorked(admin.runCommand({shardCollection: coll + "", key: {_id: 1}}));
+
+// Turn off best-effort recipient metadata refresh post-migration commit on both shards because it
+// would clean up the pending chunks on migration recipients.
+assert.commandWorked(st.shard0.getDB('admin').runCommand(
+ {configureFailPoint: 'doNotRefreshRecipientAfterCommit', mode: 'alwaysOn'}));
+assert.commandWorked(st.shard1.getDB('admin').runCommand(
+ {configureFailPoint: 'doNotRefreshRecipientAfterCommit', mode: 'alwaysOn'}));
+
+jsTest.log("Moving some chunks to shard1...");
+
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 0}}));
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 1}}));
+
+assert.commandWorked(admin.runCommand(
+ {moveChunk: coll + "", find: {_id: 0}, to: st.shard1.shardName, _waitForDelete: true}));
+assert.commandWorked(admin.runCommand(
+ {moveChunk: coll + "", find: {_id: 1}, to: st.shard1.shardName, _waitForDelete: true}));
+
+var metadata =
+ st.shard1.getDB("admin").runCommand({getShardVersion: coll + "", fullMetadata: true}).metadata;
+
+printjson(metadata);
+
+assert.eq(metadata.pending[0][0]._id, 1);
+assert.eq(metadata.pending[0][1]._id, MaxKey);
+
+jsTest.log("Ensuring we won't remove orphaned data in pending chunk...");
+
+assert(!st.shard1.getDB("admin")
+ .runCommand({cleanupOrphaned: coll + "", startingFromKey: {_id: 1}})
+ .stoppedAtKey);
+
+jsTest.log("Moving some chunks back to shard0 after empty...");
+
+assert.commandWorked(admin.runCommand(
+ {moveChunk: coll + "", find: {_id: -1}, to: st.shard1.shardName, _waitForDelete: true}));
+
+var metadata =
+ st.shard0.getDB("admin").runCommand({getShardVersion: coll + "", fullMetadata: true}).metadata;
+
+printjson(metadata);
+
+assert.eq(metadata.shardVersion.t, 0);
+assert.neq(metadata.collVersion.t, 0);
+assert.eq(metadata.pending.length, 0);
+
+assert.commandWorked(admin.runCommand(
+ {moveChunk: coll + "", find: {_id: 1}, to: st.shard0.shardName, _waitForDelete: true}));
+
+var metadata =
+ st.shard0.getDB("admin").runCommand({getShardVersion: coll + "", fullMetadata: true}).metadata;
+
+printjson(metadata);
+assert.eq(metadata.shardVersion.t, 0);
+assert.neq(metadata.collVersion.t, 0);
+assert.eq(metadata.pending[0][0]._id, 1);
+assert.eq(metadata.pending[0][1]._id, MaxKey);
+
+jsTest.log("Ensuring again we won't remove orphaned data in pending chunk...");
+
+assert(!st.shard0.getDB("admin")
+ .runCommand({cleanupOrphaned: coll + "", startingFromKey: {_id: 1}})
+ .stoppedAtKey);
+
+jsTest.log("Checking that pending chunk is promoted on reload...");
+
+assert.eq(null, coll.findOne({_id: 1}));
+
+var metadata =
+ st.shard0.getDB("admin").runCommand({getShardVersion: coll + "", fullMetadata: true}).metadata;
+
+printjson(metadata);
+assert.neq(metadata.shardVersion.t, 0);
+assert.neq(metadata.collVersion.t, 0);
+assert.eq(metadata.chunks[0][0]._id, 1);
+assert.eq(metadata.chunks[0][1]._id, MaxKey);
+
+st.printShardingStatus();
+
+jsTest.log("DONE!");
+
+st.stop();
diff --git a/jstests/sharding/cleanup_orphaned_compound.js b/jstests/sharding/cleanup_orphaned_compound.js
new file mode 100644
index 00000000000..931c08e4387
--- /dev/null
+++ b/jstests/sharding/cleanup_orphaned_compound.js
@@ -0,0 +1,20 @@
+//
+// Shards data from the key range, then inserts orphan documents, runs cleanupOrphans
+// and makes sure that orphans are removed. Uses a compound shard key.
+//
+// requires_fcv_44 because the 'disableResumableRangeDeleter' parameter was introduced in v4.4.
+// @tags: [requires_fcv_44]
+
+load('./jstests/libs/cleanup_orphaned_util.js');
+
+testCleanupOrphaned({
+ shardKey: {a: 1, b: 1},
+ keyGen: function() {
+ var ids = [];
+ for (var i = -50; i < 50; i++) {
+ ids.push({a: i, b: Math.random()});
+ }
+
+ return ids;
+ }
+});