summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMisha Tyulenev <misha@mongodb.com>2015-11-30 19:39:36 -0500
committerMisha Tyulenev <misha@mongodb.com>2015-11-30 19:44:23 -0500
commit6e545bce06f1c5caa8fc3e021d71b19dff393f54 (patch)
tree920585eebf16d3f6c81a513cde6f9a05206ab090
parent609fb074f87cc47971f0ae85a6e4eb571b67c129 (diff)
downloadmongo-6e545bce06f1c5caa8fc3e021d71b19dff393f54.tar.gz
SERVER-21158 pull cleanupOrphaned tests from QA-326
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_auth.yml2
-rw-r--r--jstests/libs/chunk_manipulation_util.js224
-rw-r--r--jstests/libs/cleanup_orphaned_util.js147
-rw-r--r--jstests/sharding/cleanup_orphaned.js15
-rw-r--r--jstests/sharding/cleanup_orphaned_auth.js59
-rw-r--r--jstests/sharding/cleanup_orphaned_basic.js151
-rw-r--r--jstests/sharding/cleanup_orphaned_cmd.js99
-rw-r--r--jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js164
-rw-r--r--jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js131
-rw-r--r--jstests/sharding/cleanup_orphaned_cmd_hashed.js28
-rw-r--r--jstests/sharding/cleanup_orphaned_compound.js18
-rw-r--r--jstests/sharding/pending_chunk.js92
12 files changed, 1018 insertions, 112 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_auth.yml b/buildscripts/resmokeconfig/suites/sharding_auth.yml
index 1e3b8f198a1..608d6926ea4 100644
--- a/buildscripts/resmokeconfig/suites/sharding_auth.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_auth.yml
@@ -20,6 +20,8 @@ selector:
- jstests/sharding/copydb_from_mongos.js # SERVER-13080
- jstests/sharding/parallel.js
- jstests/sharding/sync_cluster_config/sync[36].js
+ - jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js
+ - jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js
executor:
js_test:
diff --git a/jstests/libs/chunk_manipulation_util.js b/jstests/libs/chunk_manipulation_util.js
new file mode 100644
index 00000000000..b1e4d5a90e1
--- /dev/null
+++ b/jstests/libs/chunk_manipulation_util.js
@@ -0,0 +1,224 @@
+//
+// Utilities for testing chunk manipulation: moveChunk, mergeChunks, etc.
+//
+
+load( './jstests/libs/test_background_ops.js' );
+
+//
+// Start a background moveChunk.
+// staticMongod: Server to use for communication, use
+// "MongoRunner.runMongod({})" to make one.
+// mongosURL: Like 'localhost:27017'.
+// findCriteria: Like { _id: 1 }, passed to moveChunk's "find" option.
+// bounds: Array of two documents that specify the lower and upper
+// shard key values of a chunk to move. Specify either the
+// bounds field or the find field but not both.
+// ns: Like 'dbName.collectionName'.
+// toShardId: Like 'shard0001'.
+//
+// Returns a join function; call it to wait for moveChunk to complete.
+//
+
+function moveChunkParallel(
+ staticMongod,
+ mongosURL,
+ findCriteria,
+ bounds,
+ ns,
+ toShardId) {
+
+ assert((findCriteria || bounds) && !(findCriteria && bounds),
+ 'Specify either findCriteria or bounds, but not both.');
+
+ function runMoveChunk(
+ mongosURL,
+ findCriteria,
+ bounds,
+ ns,
+ toShardId) {
+
+ assert(mongosURL && ns && toShardId, 'Missing arguments.');
+ assert((findCriteria || bounds) && !(findCriteria && bounds),
+ 'Specify either findCriteria or bounds, but not both.');
+
+ var mongos = new Mongo( mongosURL ),
+ admin = mongos.getDB( 'admin' ),
+ cmd = { moveChunk : ns };
+
+ if (findCriteria) {
+ cmd.find = findCriteria;
+ } else {
+ cmd.bounds = bounds;
+ }
+
+ cmd.to = toShardId;
+ cmd._waitForDelete = true;
+
+ printjson(cmd);
+ var result = admin.runCommand(cmd);
+ printjson( result );
+ assert( result.ok );
+ }
+
+ // Return the join function.
+ return startParallelOps(
+ staticMongod, runMoveChunk,
+ [ mongosURL, findCriteria, bounds, ns, toShardId ] );
+}
+
+// moveChunk starts at step 0 and proceeds to 1 (it has *finished* parsing
+// options), 2 (it has reloaded config and got distributed lock) and so on.
+var moveChunkStepNames = {
+ parsedOptions: 1,
+ gotDistLock: 2,
+ startedMoveChunk: 3, // called _recvChunkStart on recipient
+ reachedSteadyState: 4, // recipient reports state is "steady"
+ committed: 5,
+ done: 6
+};
+
+function numberToName( names, stepNumber ) {
+ for ( var name in names) {
+ if ( names.hasOwnProperty(name)
+ && names[name] == stepNumber ) {
+ return name;
+ }
+ }
+
+ assert(false);
+}
+
+//
+// Configure a failpoint to make moveChunk hang at a step.
+//
+function pauseMoveChunkAtStep( shardConnection, stepNumber ) {
+ configureMoveChunkFailPoint( shardConnection, stepNumber, 'alwaysOn' );
+}
+
+//
+// Allow moveChunk to proceed past a step.
+//
+function unpauseMoveChunkAtStep( shardConnection, stepNumber ) {
+ configureMoveChunkFailPoint( shardConnection, stepNumber, 'off' );
+}
+
+function proceedToMoveChunkStep( shardConnection, stepNumber ) {
+ jsTest.log( 'moveChunk proceeding from step "'
+ + numberToName( moveChunkStepNames, stepNumber - 1 )
+ + '" to "' + numberToName( moveChunkStepNames, stepNumber )
+ + '".' );
+
+ pauseMoveChunkAtStep( shardConnection, stepNumber );
+ unpauseMoveChunkAtStep( shardConnection, stepNumber - 1 );
+ waitForMoveChunkStep( shardConnection, stepNumber );
+}
+
+
+function configureMoveChunkFailPoint( shardConnection, stepNumber, mode ) {
+ assert( stepNumber >= 1);
+ assert( stepNumber <= 6 );
+ var admin = shardConnection.getDB( 'admin' );
+ admin.runCommand({ configureFailPoint: 'moveChunkHangAtStep' + stepNumber,
+ mode: mode });
+}
+
+//
+// Wait for moveChunk to reach a step (1 through 6). Assumes only one moveChunk
+// is in mongos's currentOp.
+//
+function waitForMoveChunkStep( shardConnection, stepNumber ) {
+ var searchString = 'step ' + stepNumber,
+ admin = shardConnection.getDB( 'admin' );
+
+ assert( stepNumber >= 1);
+ assert( stepNumber <= 6 );
+
+ var msg = (
+ 'moveChunk on ' + shardConnection.shardName
+ + ' never reached step "'
+ + numberToName( moveChunkStepNames, stepNumber )
+ + '".');
+
+ assert.soon( function() {
+ var in_progress = admin.currentOp().inprog;
+ for ( var i = 0; i < in_progress.length; ++i ) {
+ var op = in_progress[i];
+ if ( op.query && op.query.moveChunk ) {
+ return op.msg && op.msg.startsWith( searchString );
+ }
+ }
+
+ return false;
+ }, msg);
+}
+
+var migrateStepNames = {
+ copiedIndexes: 1,
+ deletedPriorDataInRange: 2,
+ cloned: 3,
+ transferredMods: 4, // About to enter steady state.
+ done: 5
+};
+
+//
+// Configure a failpoint to make migration thread hang at a step (1 through 5).
+//
+function pauseMigrateAtStep( shardConnection, stepNumber ) {
+ configureMigrateFailPoint( shardConnection, stepNumber, 'alwaysOn' );
+}
+
+//
+// Allow _recvChunkStart to proceed past a step.
+//
+function unpauseMigrateAtStep( shardConnection, stepNumber ) {
+ configureMigrateFailPoint( shardConnection, stepNumber, 'off' );
+}
+
+function proceedToMigrateStep( shardConnection, stepNumber ) {
+ jsTest.log( 'Migration thread proceeding from step "'
+ + numberToName( migrateStepNames, stepNumber - 1 )
+ + '" to "' + numberToName( migrateStepNames, stepNumber )
+ + '".');
+
+ pauseMigrateAtStep( shardConnection, stepNumber );
+ unpauseMigrateAtStep( shardConnection, stepNumber - 1 );
+ waitForMigrateStep( shardConnection, stepNumber );
+}
+
+function configureMigrateFailPoint( shardConnection, stepNumber, mode ) {
+ assert( stepNumber >= 1);
+ assert( stepNumber <= 5 );
+ var admin = shardConnection.getDB( 'admin' );
+ admin.runCommand({ configureFailPoint: 'migrateThreadHangAtStep' + stepNumber,
+ mode: mode });
+}
+
+//
+// Wait for moveChunk to reach a step (1 through 6).
+//
+function waitForMigrateStep( shardConnection, stepNumber ) {
+ var searchString = 'step ' + stepNumber,
+ admin = shardConnection.getDB( 'admin' );
+
+ assert( stepNumber >= 1);
+ assert( stepNumber <= 5 );
+
+ var msg = (
+ 'Migrate thread on ' + shardConnection.shardName
+ + ' never reached step "'
+ + numberToName( migrateStepNames, stepNumber )
+ + '".');
+
+ assert.soon( function() {
+ // verbose = True so we see the migration thread.
+ var in_progress = admin.currentOp(true).inprog;
+ for ( var i = 0; i < in_progress.length; ++i ) {
+ var op = in_progress[i];
+ if ( op.desc && op.desc === 'migrateThread' ) {
+ return op.msg.startsWith( searchString );
+ }
+ }
+
+ return false;
+ }, msg);
+}
diff --git a/jstests/libs/cleanup_orphaned_util.js b/jstests/libs/cleanup_orphaned_util.js
new file mode 100644
index 00000000000..ffc13c107d6
--- /dev/null
+++ b/jstests/libs/cleanup_orphaned_util.js
@@ -0,0 +1,147 @@
+//
+// Utilities for testing cleanupOrphaned command.
+//
+
+//
+// Run cleanupOrphaned on a shard, and assert cleanupOrphaned runs the
+// expected number of times before stopping.
+//
+function cleanupOrphaned(shardConnection, ns, expectedIterations) {
+ var admin = shardConnection.getDB('admin'),
+ result = admin.runCommand({cleanupOrphaned: ns}),
+ iterations = 1;
+
+ if (!result.ok) { printjson(result); }
+ assert(result.ok);
+ while (result.stoppedAtKey) {
+ result = admin.runCommand({
+ cleanupOrphaned: ns,
+ startingFromKey: result.stoppedAtKey
+ });
+
+ assert(result.ok);
+ ++iterations;
+ }
+
+ assert.eq(iterations, expectedIterations, 'Expected to run ' +
+ 'cleanupOrphaned' + expectedIterations + ' times, but it only ran ' +
+ iterations + ' times before stoppedAtKey was null.')
+}
+
+// Shards data from key range, then inserts orphan documents, runs cleanupOrphans
+// and makes sure that orphans are removed.
+// Pass an options object like:
+// {
+// shardKey: { a: 1, b: 1 },
+// keyGen: function() { return [{ a: 'foo', b: 1 }, { a: 'bar', b: 2 }]; }
+// }
+function testCleanupOrphaned(options) {
+ var st = new ShardingTest({shards: 2, mongos: 2});
+
+ var mongos = st.s0,
+ admin = mongos.getDB('admin'),
+ shards = mongos.getCollection('config.shards').find().toArray(),
+ coll = mongos.getCollection('foo.bar'),
+ shard0Coll = st.shard0.getCollection(coll.getFullName()),
+ keys = options.keyGen(),
+ beginning = keys[0],
+ oneQuarter = keys[Math.round(keys.length / 4)],
+ middle = keys[Math.round(keys.length / 2)],
+ threeQuarters = keys[Math.round(3 * keys.length / 4)];
+
+ assert.commandWorked(admin.runCommand({
+ enableSharding: coll.getDB().getName()
+ }));
+
+ printjson( admin.runCommand({ movePrimary : coll.getDB() + "", to : shards[0]._id }) );
+
+ assert.commandWorked(admin.runCommand({
+ shardCollection: coll.getFullName(),
+ key: options.shardKey
+ }));
+
+ st.printShardingStatus();
+
+ jsTest.log('Inserting some regular docs...');
+
+ assert.commandWorked(admin.runCommand({
+ split: coll.getFullName(),
+ middle: middle
+ }));
+
+ assert.commandWorked(admin.runCommand({
+ moveChunk: coll.getFullName(),
+ find: middle,
+ to: shards[1]._id,
+ _waitForDelete: true
+ }));
+
+ for (var i = 0; i < keys.length; i++) coll.insert(keys[i]);
+ assert.eq(null, coll.getDB().getLastError());
+
+ // Half of the data is on each shard:
+ // shard 0: [beginning, middle)
+ // shard 1: [middle, end)
+ //
+ assert.eq(keys.length / 2, shard0Coll.count());
+ assert.eq(keys.length, coll.find().itcount());
+
+ jsTest.log('Inserting some orphaned docs...');
+
+ shard0Coll.insert(threeQuarters);
+
+ // I'll represent the orphan doc like {threeQuarters}, in this diagram:
+ //
+ // shard 0: [beginning, middle) {threeQuarters}
+ // shard 1: [middle, end)
+ assert.eq(null, shard0Coll.getDB().getLastError());
+ assert.eq(1 + keys.length / 2, shard0Coll.count());
+
+ jsTest.log('Cleaning up orphaned data...');
+
+ cleanupOrphaned(st.shard0, coll.getFullName(), 2);
+ assert.eq(keys.length / 2, shard0Coll.count());
+ assert.eq(keys.length, coll.find().itcount());
+
+ jsTest.log('Moving half the data out again (making a hole)...');
+
+ assert.commandWorked(admin.runCommand({
+ split: coll.getFullName(),
+ middle: oneQuarter
+ }));
+
+ assert.commandWorked(admin.runCommand({
+ moveChunk: coll.getFullName(),
+ find: beginning,
+ to: shards[1]._id,
+ _waitForDelete: true
+ }));
+
+ // 1/4 of the data is on the first shard.
+ // shard 0: [threeQuarters, middle)
+ // shard 1: [beginning, threeQuarters) [middle, end)
+ assert.eq(Math.round(keys.length / 4), shard0Coll.count());
+ assert.eq(keys.length, coll.find().itcount());
+
+ jsTest.log('Inserting some more orphaned docs...');
+
+ shard0Coll.insert(beginning);
+ shard0Coll.insert(middle);
+ assert.eq(null, shard0Coll.getDB().getLastError());
+
+ // shard 0: {beginning} [threeQuarters, middle) {middle}
+ // shard 1: [beginning, threeQuarters) [middle, end)
+ assert.eq(2 + Math.round(keys.length / 4), shard0Coll.count());
+ assert.eq(100, coll.find().itcount());
+
+ jsTest.log('Cleaning up more orphaned data...');
+
+ // Now cleanupOrphaned must iterate over 3 regions, not 2.
+ cleanupOrphaned(st.shard0, coll.getFullName(), 3);
+ assert.eq(Math.round(keys.length / 4), shard0Coll.count());
+ assert.eq(keys.length, coll.find().itcount());
+
+ jsTest.log('DONE!');
+
+ st.stop();
+}
diff --git a/jstests/sharding/cleanup_orphaned.js b/jstests/sharding/cleanup_orphaned.js
new file mode 100644
index 00000000000..bbe383b94ce
--- /dev/null
+++ b/jstests/sharding/cleanup_orphaned.js
@@ -0,0 +1,15 @@
+//
+// Shards data from the key range, then inserts orphan documents, runs cleanupOrphans
+// and makes sure that orphans are removed. Uses an _id as a shard key.
+//
+
+load('./jstests/libs/cleanup_orphaned_util.js');
+
+testCleanupOrphaned({
+ shardKey: {_id: 1},
+ keyGen: function() {
+ var ids = [];
+ for (var i = -50; i < 50; i++) { ids.push({_id: i}); }
+ return ids;
+ }
+});
diff --git a/jstests/sharding/cleanup_orphaned_auth.js b/jstests/sharding/cleanup_orphaned_auth.js
new file mode 100644
index 00000000000..3b6e3fb21e5
--- /dev/null
+++ b/jstests/sharding/cleanup_orphaned_auth.js
@@ -0,0 +1,59 @@
+//
+// Tests of cleanupOrphaned command permissions.
+//
+
+(function() {
+"use strict";
+
+function assertUnauthorized(res, msg){
+ if (assert._debug && msg) print("in assert for: " + msg);
+
+ if (res.ok == 0 && res.errmsg.startsWith('not authorized'))
+ return;
+
+ var finalMsg = "command worked when it should have been unauthorized: " + tojson(res);
+ if (msg) { finalMsg += " : " + msg; }
+ doassert(finalMsg);
+}
+
+var st = new ShardingTest({
+ auth: true,
+ keyFile: 'jstests/libs/key1',
+ other: {useHostname: false}
+});
+
+var shardAdmin = st.shard0.getDB('admin');
+shardAdmin.createUser({user: 'admin', pwd: 'x', roles: ['clusterAdmin', 'userAdminAnyDatabase']});
+shardAdmin.auth('admin', 'x');
+
+var mongos = st.s0;
+var mongosAdmin = mongos.getDB('admin');
+var coll = mongos.getCollection('foo.bar');
+
+mongosAdmin.createUser({user: 'admin', pwd: 'x', roles: ['clusterAdmin', 'userAdminAnyDatabase']});
+mongosAdmin.auth('admin', 'x');
+
+assert.commandWorked(mongosAdmin.runCommand({
+ enableSharding: coll.getDB().getName()
+}));
+
+assert.commandWorked(mongosAdmin.runCommand({
+ shardCollection: coll.getFullName(),
+ key: {_id: 'hashed'}
+}));
+
+
+// cleanupOrphaned requires auth as admin user.
+assert.commandWorked(shardAdmin.logout());
+assertUnauthorized(shardAdmin.runCommand({cleanupOrphaned: 'foo.bar'}));
+
+var fooDB = st.shard0.getDB('foo');
+shardAdmin.auth('admin', 'x');
+fooDB.createUser({user:'user', pwd:'x', roles:['readWrite', 'dbAdmin']});
+shardAdmin.logout();
+fooDB.auth('user', 'x');
+assertUnauthorized(shardAdmin.runCommand({cleanupOrphaned: 'foo.bar'}));
+
+st.stop();
+
+})()
diff --git a/jstests/sharding/cleanup_orphaned_basic.js b/jstests/sharding/cleanup_orphaned_basic.js
new file mode 100644
index 00000000000..5517376582d
--- /dev/null
+++ b/jstests/sharding/cleanup_orphaned_basic.js
@@ -0,0 +1,151 @@
+//
+// Basic tests of cleanupOrphaned. Validates that non allowed uses of the cleanupOrphaned
+// command fail.
+//
+
+(function() {
+"use strict";
+
+/*****************************************************************************
+ * Unsharded mongod.
+ ****************************************************************************/
+
+// cleanupOrphaned fails against unsharded mongod.
+var mongod = MongoRunner.runMongod();
+assert.commandFailed(mongod.getDB('admin').runCommand({cleanupOrphaned: 'foo.bar'}));
+
+/*****************************************************************************
+ * Bad invocations of cleanupOrphaned command.
+ ****************************************************************************/
+
+var st = new ShardingTest({
+ other: {
+ rs: true,
+ rsOptions: {nodes: 2}
+ }
+});
+
+var mongos = st.s0;
+var shards = mongos.getCollection('config.shards').find().toArray();
+var mongosAdmin = mongos.getDB('admin');
+var dbName = 'foo';
+var collectionName = 'bar';
+var ns = dbName + '.' + collectionName;
+var coll = mongos.getCollection(ns);
+
+// cleanupOrphaned fails against mongos ('no such command'): it must be run
+// on mongod.
+assert.commandFailed(mongosAdmin.runCommand({cleanupOrphaned: ns}));
+
+// cleanupOrphaned must be run on admin DB.
+var shardFooDB = st.shard0.getDB(dbName);
+assert.commandFailed(shardFooDB.runCommand({cleanupOrphaned: ns}));
+
+// Must be run on primary.
+var secondaryAdmin = st.rs0.getSecondary().getDB('admin');
+var response = secondaryAdmin.runCommand({cleanupOrphaned: ns});
+print('cleanupOrphaned on secondary:');
+printjson(response);
+assert.commandFailed(response);
+
+var shardAdmin = st.shard0.getDB('admin');
+var badNS = ' \\/."*<>:|?';
+assert.commandFailed(shardAdmin.runCommand({cleanupOrphaned: badNS}));
+
+/*****************************************************************************
+ * Unsharded namespaces.
+ ****************************************************************************/
+
+// cleanupOrphaned fails on unsharded database.
+assert.commandFailed(shardAdmin.runCommand({cleanupOrphaned: ns}));
+
+// cleanupOrphaned works on sharded collection.
+assert.commandWorked(mongosAdmin.runCommand({
+ enableSharding: coll.getDB().getName()
+}));
+
+st.ensurePrimaryShard(coll.getDB().getName(), shards[0]._id);
+
+assert.commandWorked(mongosAdmin.runCommand({
+ shardCollection: ns,
+ key: {_id: 1}
+}));
+
+assert.commandWorked(shardAdmin.runCommand({cleanupOrphaned: ns}));
+
+/*****************************************************************************
+ * Empty shard.
+ ****************************************************************************/
+
+// Ping shard[1] so it will be aware that it is sharded. Otherwise cleanupOrphaned
+// may fail.
+assert.commandWorked(mongosAdmin.runCommand({
+ moveChunk: coll.getFullName(),
+ find: {_id: 1},
+ to: shards[1]._id
+}));
+
+assert.commandWorked(mongosAdmin.runCommand({
+ moveChunk: coll.getFullName(),
+ find: {_id: 1},
+ to: shards[0]._id
+}));
+
+// Collection's home is shard0, there are no chunks assigned to shard1.
+st.shard1.getCollection(ns).insert({});
+assert.eq(null, st.shard1.getDB(dbName).getLastError());
+assert.eq(1, st.shard1.getCollection(ns).count());
+response = st.shard1.getDB('admin').runCommand({cleanupOrphaned: ns});
+assert.commandWorked(response);
+assert.eq({_id: {$maxKey:1}}, response.stoppedAtKey);
+assert.eq(
+ 0, st.shard1.getCollection(ns).count(),
+ "cleanupOrphaned didn't delete orphan on empty shard.");
+
+/*****************************************************************************
+ * Bad startingFromKeys.
+ ****************************************************************************/
+
+// startingFromKey of MaxKey.
+response = shardAdmin.runCommand({
+ cleanupOrphaned: ns,
+ startingFromKey: {_id: MaxKey}
+});
+assert.commandWorked(response);
+assert.eq(null, response.stoppedAtKey);
+
+// startingFromKey doesn't match number of fields in shard key.
+assert.commandFailed(shardAdmin.runCommand({
+ cleanupOrphaned: ns,
+ startingFromKey: {someKey: 'someValue', someOtherKey: 1}
+}));
+
+// startingFromKey matches number of fields in shard key but not field names.
+assert.commandFailed(shardAdmin.runCommand({
+ cleanupOrphaned: ns,
+ startingFromKey: {someKey: 'someValue'}
+}));
+
+var coll2 = mongos.getCollection('foo.baz');
+
+assert.commandWorked(mongosAdmin.runCommand({
+ shardCollection: coll2.getFullName(),
+ key: {a:1, b:1}
+}));
+
+
+// startingFromKey doesn't match number of fields in shard key.
+assert.commandFailed(shardAdmin.runCommand({
+ cleanupOrphaned: coll2.getFullName(),
+ startingFromKey: {someKey: 'someValue'}
+}));
+
+// startingFromKey matches number of fields in shard key but not field names.
+assert.commandFailed(shardAdmin.runCommand({
+ cleanupOrphaned: coll2.getFullName(),
+ startingFromKey: {a: 'someValue', c: 1}
+}));
+
+st.stop();
+
+})()
diff --git a/jstests/sharding/cleanup_orphaned_cmd.js b/jstests/sharding/cleanup_orphaned_cmd.js
deleted file mode 100644
index deca5a19bc8..00000000000
--- a/jstests/sharding/cleanup_orphaned_cmd.js
+++ /dev/null
@@ -1,99 +0,0 @@
-//
-// Tests cleanup of orphaned data via the orphaned data cleanup command
-//
-
-var st = new ShardingTest({ shards: 2 });
-st.stopBalancer();
-
-var mongos = st.s0;
-var admin = mongos.getDB( "admin" );
-var shards = mongos.getCollection( "config.shards" ).find().toArray();
-var coll = mongos.getCollection( "foo.bar" );
-
-assert( admin.runCommand({ enableSharding : coll.getDB() + "" }).ok );
-printjson( admin.runCommand({ movePrimary : coll.getDB() + "", to : shards[0]._id }) );
-assert( admin.runCommand({ shardCollection : coll + "", key : { _id : 1 } }).ok );
-assert( admin.runCommand({ split : coll + "", middle : { _id : 0 } }).ok );
-assert( admin.runCommand({ moveChunk : coll + "",
- find : { _id : 0 },
- to : shards[1]._id,
- _waitForDelete : true }).ok );
-
-st.printShardingStatus();
-
-jsTest.log( "Inserting some regular docs..." );
-
-var bulk = coll.initializeUnorderedBulkOp();
-for ( var i = -50; i < 50; i++ ) bulk.insert({ _id : i });
-assert.writeOK( bulk.execute() );
-
-// Half of the data is on each shard
-
-jsTest.log( "Inserting some orphaned docs..." );
-
-var shard0Coll = st.shard0.getCollection( coll + "" );
-assert.writeOK( shard0Coll.insert({ _id : 10 }));
-
-assert.neq( 50, shard0Coll.count() );
-assert.eq( 100, coll.find().itcount() );
-
-jsTest.log( "Cleaning up orphaned data..." );
-
-var shard0Admin = st.shard0.getDB( "admin" );
-var result = shard0Admin.runCommand({ cleanupOrphaned : coll + "" });
-while ( result.ok && result.stoppedAtKey ) {
- printjson( result );
- result = shard0Admin.runCommand({ cleanupOrphaned : coll + "",
- startingFromKey : result.stoppedAtKey });
-}
-
-printjson( result );
-assert( result.ok );
-assert.eq( 50, shard0Coll.count() );
-assert.eq( 100, coll.find().itcount() );
-
-jsTest.log( "Moving half the data out again (making a hole)..." );
-
-assert( admin.runCommand({ split : coll + "", middle : { _id : -35 } }).ok );
-assert( admin.runCommand({ split : coll + "", middle : { _id : -10 } }).ok );
-// Make sure we wait for the deletion here, otherwise later cleanup could fail
-assert( admin.runCommand({ moveChunk : coll + "",
- find : { _id : -35 },
- to : shards[1]._id,
- _waitForDelete : true }).ok );
-
-// 1/4 the data is on the first shard
-
-jsTest.log( "Inserting some more orphaned docs..." );
-
-st.printShardingStatus();
-
-var shard0Coll = st.shard0.getCollection( coll + "" );
-assert.writeOK(shard0Coll.insert({ _id : -35 }));
-assert.writeOK(shard0Coll.insert({ _id : -11 }));
-assert.writeOK(shard0Coll.insert({ _id : 0 }));
-assert.writeOK(shard0Coll.insert({ _id : 10 }));
-
-assert.neq( 25, shard0Coll.count() );
-assert.eq( 100, coll.find().itcount() );
-
-jsTest.log( "Cleaning up more orphaned data..." );
-
-var shard0Admin = st.shard0.getDB( "admin" );
-var result = shard0Admin.runCommand({ cleanupOrphaned : coll + "",
- secondaryThrottle: true,
- writeConcern: { w: 1 }});
-while ( result.ok && result.stoppedAtKey ) {
- printjson( result );
- result = shard0Admin.runCommand({ cleanupOrphaned : coll + "",
- startingFromKey : result.stoppedAtKey });
-}
-
-printjson( result );
-assert( result.ok );
-assert.eq( 25, shard0Coll.count() );
-assert.eq( 100, coll.find().itcount() );
-
-jsTest.log( "DONE!" );
-
-st.stop();
diff --git a/jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js
new file mode 100644
index 00000000000..54a75c78733
--- /dev/null
+++ b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js
@@ -0,0 +1,164 @@
+//
+// Tests cleanupOrphaned concurrent with moveChunk.
+// Inserts orphan documents to the donor and recipient shards during the moveChunk and
+// verifies that cleanupOrphaned removes orphans.
+//
+
+load('./jstests/libs/chunk_manipulation_util.js');
+load('./jstests/libs/cleanup_orphaned_util.js');
+
+(function() {
+"use strict";
+
+var staticMongod = MongoRunner.runMongod({}); // For startParallelOps.
+var st = new ShardingTest({shards: 2, mongos: 1,
+ other: {separateConfig: true, shardOptions: {verbose: 0}}});
+
+var mongos = st.s0,
+ admin = mongos.getDB('admin'),
+ shards = mongos.getCollection('config.shards').find().toArray(),
+ dbName = 'foo',
+ ns = dbName + '.bar',
+ coll = mongos.getCollection(ns),
+ donor = st.shard0,
+ recipient = st.shard1,
+ donorColl = donor.getCollection(ns),
+ recipientColl = st.shard1.getCollection(ns);
+
+// Three chunks of 10 documents each, with ids -20, -18, -16, ..., 38.
+// Donor: [minKey, 0) [0, 20)
+// Recipient: [20, maxKey)
+assert.commandWorked( admin.runCommand({enableSharding: dbName}) );
+printjson( admin.runCommand({movePrimary: dbName, to: shards[0]._id}) );
+assert.commandWorked( admin.runCommand({shardCollection: ns, key: {_id: 1}}) );
+assert.commandWorked( admin.runCommand({split: ns, middle: {_id: 0}}) );
+assert.commandWorked( admin.runCommand({split: ns, middle: {_id: 20}}) );
+assert.commandWorked( admin.runCommand({moveChunk: ns,
+ find: {_id: 20},
+ to: shards[1]._id,
+ _waitForDelete: true}) );
+
+jsTest.log('Inserting 40 docs into shard 0....');
+for (var i = -20; i < 20; i += 2) coll.insert({_id: i});
+assert.eq(null, coll.getDB().getLastError());
+assert.eq(20, donorColl.count());
+
+jsTest.log('Inserting 25 docs into shard 1....');
+for (i = 20; i < 40; i += 2) coll.insert({_id: i});
+assert.eq(null, coll.getDB().getLastError());
+assert.eq(10, recipientColl.count());
+
+//
+// Start a moveChunk in the background. Move chunk [0, 20), which has 10 docs,
+// from shard 0 to shard 1. Pause it at some points in the donor's and
+// recipient's work flows, and test cleanupOrphaned on shard 0 and shard 1.
+//
+
+jsTest.log('setting failpoint startedMoveChunk');
+pauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk);
+pauseMigrateAtStep(recipient, migrateStepNames.cloned);
+var joinMoveChunk = moveChunkParallel(
+ staticMongod,
+ st.s0.host,
+ {_id: 0},
+ null,
+ coll.getFullName(),
+ shards[1]._id);
+
+waitForMoveChunkStep(donor, moveChunkStepNames.startedMoveChunk);
+waitForMigrateStep(recipient, migrateStepNames.cloned);
+// Recipient has run _recvChunkStart and begun its migration thread; docs have
+// been cloned and chunk [0, 20) is noted as 'pending' on recipient.
+
+// Donor: [minKey, 0) [0, 20)
+// Recipient (pending): [0, 20)
+// Recipient: [20, maxKey)
+
+// Create orphans. I'll show an orphaned doc on donor with _id 26 like {26}:
+//
+// Donor: [minKey, 0) [0, 20) {26}
+// Recipient (pending): [0, 20)
+// Recipient: {-1} [20, maxKey)
+donorColl.insert([{_id: 26}]);
+assert.eq(null, donorColl.getDB().getLastError());
+assert.eq(21, donorColl.count());
+recipientColl.insert([{_id: -1}]);
+assert.eq(null, recipientColl.getDB().getLastError());
+assert.eq(21, recipientColl.count());
+
+cleanupOrphaned(donor, ns, 2);
+assert.eq(20, donorColl.count());
+cleanupOrphaned(recipient, ns, 2);
+assert.eq(20, recipientColl.count());
+
+jsTest.log('Inserting document on donor side');
+// Inserted a new document (not an orphan) with id 19, which belongs in the
+// [0, 20) chunk.
+donorColl.insert({_id: 19});
+assert.eq(null, coll.getDB().getLastError());
+assert.eq(21, donorColl.count());
+
+// Recipient transfers this modification.
+jsTest.log('Let migrate proceed to transferredMods');
+pauseMigrateAtStep(recipient, migrateStepNames.transferredMods);
+unpauseMigrateAtStep(recipient, migrateStepNames.cloned);
+waitForMigrateStep(recipient, migrateStepNames.transferredMods);
+jsTest.log('Done letting migrate proceed to transferredMods');
+
+assert.eq(
+ 21, recipientColl.count(),
+ "Recipient didn't transfer inserted document.");
+
+cleanupOrphaned(donor, ns, 2);
+assert.eq(21, donorColl.count());
+cleanupOrphaned(recipient, ns, 2);
+assert.eq(21, recipientColl.count());
+
+// Create orphans.
+donorColl.insert([{_id: 26}]);
+assert.eq(null, donorColl.getDB().getLastError());
+assert.eq(22, donorColl.count());
+recipientColl.insert([{_id: -1}]);
+assert.eq(null, recipientColl.getDB().getLastError());
+assert.eq(22, recipientColl.count());
+
+cleanupOrphaned(donor, ns, 2);
+assert.eq(21, donorColl.count());
+cleanupOrphaned(recipient, ns, 2);
+assert.eq(21, recipientColl.count());
+
+// Recipient has been waiting for donor to call _recvChunkCommit.
+pauseMoveChunkAtStep(donor, moveChunkStepNames.committed);
+unpauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk);
+unpauseMigrateAtStep(recipient, migrateStepNames.transferredMods);
+proceedToMigrateStep(recipient, migrateStepNames.done);
+
+// Create orphans.
+donorColl.insert([{_id: 26}]);
+assert.eq(null, donorColl.getDB().getLastError());
+assert.eq(22, donorColl.count());
+recipientColl.insert([{_id: -1}]);
+assert.eq(null, recipientColl.getDB().getLastError());
+assert.eq(22, recipientColl.count());
+
+// cleanupOrphaned should still fail on donor, but should work on the recipient
+cleanupOrphaned(donor, ns, 2);
+assert.eq(10, donorColl.count());
+cleanupOrphaned(recipient, ns, 2);
+assert.eq(21, recipientColl.count());
+
+// Let migration thread complete.
+unpauseMigrateAtStep(recipient, migrateStepNames.done);
+unpauseMoveChunkAtStep(donor, moveChunkStepNames.committed);
+joinMoveChunk();
+
+// Donor has finished post-move delete.
+cleanupOrphaned(donor, ns, 2); // this is necessary for the count to not be 11
+assert.eq(10, donorColl.count());
+assert.eq(21, recipientColl.count());
+assert.eq(31, coll.count());
+
+jsTest.log('DONE!');
+st.stop();
+
+})()
diff --git a/jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js
new file mode 100644
index 00000000000..a42d5125aa4
--- /dev/null
+++ b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js
@@ -0,0 +1,131 @@
+//
+//
+// Tests cleanupOrphaned concurrent with moveChunk with a hashed shard key.
+// Inserts orphan documents to the donor and recipient shards during the moveChunk and
+// verifies that cleanupOrphaned removes orphans.
+//
+
+load('./jstests/libs/chunk_manipulation_util.js');
+load('./jstests/libs/cleanup_orphaned_util.js');
+
+(function() {
+"use strict";
+
+var staticMongod = MongoRunner.runMongod({}); // For startParallelOps.
+var st = new ShardingTest({shards: 2, mongos: 1,
+ other: {separateConfig: true, shardOptions: {verbose: 0}}});
+
+var mongos = st.s0,
+ admin = mongos.getDB('admin'),
+ shards = mongos.getCollection('config.shards').find().toArray(),
+ dbName = 'foo',
+ ns = dbName + '.bar',
+ coll = mongos.getCollection(ns);
+
+assert.commandWorked( admin.runCommand({enableSharding: dbName}) );
+printjson(admin.runCommand({movePrimary: dbName, to: shards[0]._id}));
+assert.commandWorked( admin.runCommand({shardCollection: ns, key: {key: 'hashed'}}) );
+
+// Makes four chunks by default, two on each shard.
+var chunks = st.config.chunks.find().sort({min: 1}).toArray();
+assert.eq(4, chunks.length);
+
+var chunkWithDoc = chunks[1];
+print('Trying to make doc that hashes to this chunk: '
+ + tojson(chunkWithDoc));
+
+var found = false;
+for (var i = 0; i < 10000; i++) {
+ var doc = {key: ObjectId()},
+ hash = mongos.adminCommand({_hashBSONElement: doc.key}).out;
+
+ print('doc.key ' + doc.key + ' hashes to ' + hash);
+
+ if (mongos.getCollection('config.chunks').findOne({
+ _id: chunkWithDoc._id,
+ 'min.key': {$lte: hash},
+ 'max.key': {$gt: hash}
+ })) {
+ found = true;
+ break;
+ }
+}
+
+assert(found, "Couldn't make doc that belongs to chunk 1.");
+print('Doc: ' + tojson(doc));
+coll.insert(doc);
+assert.eq(null, coll.getDB().getLastError());
+
+//
+// Start a moveChunk in the background from shard 0 to shard 1. Pause it at
+// some points in the donor's and recipient's work flows, and test
+// cleanupOrphaned.
+//
+
+var donor, recip;
+if (chunkWithDoc.shard == st.shard0.shardName) {
+ donor = st.shard0;
+ recip = st.shard1;
+} else {
+ recip = st.shard0;
+ donor = st.shard1;
+}
+
+jsTest.log('setting failpoint startedMoveChunk');
+pauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk);
+pauseMigrateAtStep(recip, migrateStepNames.cloned);
+
+var joinMoveChunk = moveChunkParallel(
+ staticMongod,
+ st.s0.host,
+ null,
+ [chunkWithDoc.min, chunkWithDoc.max], // bounds
+ coll.getFullName(),
+ recip.shardName);
+
+waitForMoveChunkStep(donor, moveChunkStepNames.startedMoveChunk);
+waitForMigrateStep(recip, migrateStepNames.cloned);
+proceedToMigrateStep(recip, migrateStepNames.transferredMods);
+// recipient has run _recvChunkStart and begun its migration thread;
+// 'doc' has been cloned and chunkWithDoc is noted as 'pending' on recipient.
+
+var donorColl = donor.getCollection(ns),
+ recipColl = recip.getCollection(ns);
+
+assert.eq(1, donorColl.count());
+assert.eq(1, recipColl.count());
+
+// cleanupOrphaned should go through two iterations, since the default chunk
+// setup leaves two unowned ranges on each shard.
+cleanupOrphaned(donor, ns, 2);
+cleanupOrphaned(recip, ns, 2);
+assert.eq(1, donorColl.count());
+assert.eq(1, recipColl.count());
+
+// recip has been waiting for donor to call _recvChunkCommit.
+pauseMoveChunkAtStep(donor, moveChunkStepNames.committed);
+unpauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk);
+unpauseMigrateAtStep(recip, migrateStepNames.transferredMods);
+proceedToMigrateStep(recip, migrateStepNames.done);
+
+// cleanupOrphaned removes migrated data from donor. The donor would
+// otherwise clean them up itself, in the post-move delete phase.
+cleanupOrphaned(donor, ns, 2);
+assert.eq(0, donorColl.count());
+cleanupOrphaned(recip, ns, 2);
+assert.eq(1, recipColl.count());
+
+// Let migration thread complete.
+unpauseMigrateAtStep(recip, migrateStepNames.done);
+unpauseMoveChunkAtStep(donor, moveChunkStepNames.committed);
+joinMoveChunk();
+
+// donor has finished post-move delete.
+assert.eq(0, donorColl.count());
+assert.eq(1, recipColl.count());
+assert.eq(1, coll.count());
+
+jsTest.log('DONE!');
+st.stop();
+
+})()
diff --git a/jstests/sharding/cleanup_orphaned_cmd_hashed.js b/jstests/sharding/cleanup_orphaned_cmd_hashed.js
index 8ede0c0aaa0..3a88fe49f46 100644
--- a/jstests/sharding/cleanup_orphaned_cmd_hashed.js
+++ b/jstests/sharding/cleanup_orphaned_cmd_hashed.js
@@ -3,34 +3,34 @@
// @tags : [ hashed ]
//
-var options = { shardOptions : { verbose : 2 } };
+(function() {
+"use strict";
-var st = new ShardingTest({ shards : 2, mongos : 1, other : options });
-st.stopBalancer();
+var st = new ShardingTest({ shards : 2, mongos : 1, other : { shardOptions : { verbose : 2 } } });
var mongos = st.s0;
var admin = mongos.getDB( "admin" );
var shards = mongos.getCollection( "config.shards" ).find().toArray();
var coll = mongos.getCollection( "foo.bar" );
-assert( admin.runCommand({ enableSharding : coll.getDB() + "" }).ok );
+assert.commandWorked( admin.runCommand({ enableSharding : coll.getDB() + "" }) );
printjson( admin.runCommand({ movePrimary : coll.getDB() + "", to : shards[0]._id }) );
-assert( admin.runCommand({ shardCollection : coll + "", key : { _id : "hashed" } }).ok );
+assert.commandWorked( admin.runCommand({ shardCollection : coll + "", key : { _id : "hashed" } }) );
// Create two orphaned data holes, one bounded by min or max on each shard
-assert( admin.runCommand({ split : coll + "", middle : { _id : NumberLong(-100) } }).ok );
-assert( admin.runCommand({ split : coll + "", middle : { _id : NumberLong(-50) } }).ok );
-assert( admin.runCommand({ split : coll + "", middle : { _id : NumberLong(50) } }).ok );
-assert( admin.runCommand({ split : coll + "", middle : { _id : NumberLong(100) } }).ok );
-assert( admin.runCommand({ moveChunk : coll + "", bounds : [{ _id : NumberLong(-100) },
+assert.commandWorked( admin.runCommand({ split : coll + "", middle : { _id : NumberLong(-100) } }) );
+assert.commandWorked( admin.runCommand({ split : coll + "", middle : { _id : NumberLong(-50) } }) );
+assert.commandWorked( admin.runCommand({ split : coll + "", middle : { _id : NumberLong(50) } }) );
+assert.commandWorked( admin.runCommand({ split : coll + "", middle : { _id : NumberLong(100) } }) );
+assert.commandWorked( admin.runCommand({ moveChunk : coll + "", bounds : [{ _id : NumberLong(-100) },
{ _id : NumberLong(-50) }],
to : shards[1]._id,
- _waitForDelete : true }).ok );
-assert( admin.runCommand({ moveChunk : coll + "", bounds : [{ _id : NumberLong(50) },
+ _waitForDelete : true }) );
+assert.commandWorked( admin.runCommand({ moveChunk : coll + "", bounds : [{ _id : NumberLong(50) },
{ _id : NumberLong(100) }],
to : shards[0]._id,
- _waitForDelete : true }).ok );
+ _waitForDelete : true }) );
st.printShardingStatus();
jsTest.log( "Inserting some docs on each shard, so 1/2 will be orphaned..." );
@@ -69,3 +69,5 @@ assert.eq( 100, coll.find().itcount() );
jsTest.log( "DONE!" );
st.stop();
+
+})()
diff --git a/jstests/sharding/cleanup_orphaned_compound.js b/jstests/sharding/cleanup_orphaned_compound.js
new file mode 100644
index 00000000000..ebf7163c77d
--- /dev/null
+++ b/jstests/sharding/cleanup_orphaned_compound.js
@@ -0,0 +1,18 @@
+//
+// Shards data from the key range, then inserts orphan documents, runs cleanupOrphans
+// and makes sure that orphans are removed. Uses a compound shard key.
+//
+
+load('./jstests/libs/cleanup_orphaned_util.js');
+
+testCleanupOrphaned({
+ shardKey: {a: 1, b: 1},
+ keyGen: function() {
+ var ids = [];
+ for (var i = -50; i < 50; i++) {
+ ids.push({a: i, b: Math.random()});
+ }
+
+ return ids;
+ }
+});
diff --git a/jstests/sharding/pending_chunk.js b/jstests/sharding/pending_chunk.js
new file mode 100644
index 00000000000..b40b4e3fe48
--- /dev/null
+++ b/jstests/sharding/pending_chunk.js
@@ -0,0 +1,92 @@
+//
+// Tests pending chunk metadata.
+//
+
+(function() {
+"use strict";
+
+var st = new ShardingTest({shards: 2, mongos: 2,
+ other: {separateConfig: true, shardOptions: {verbose: 2}}});
+
+var mongos = st.s0;
+var admin = mongos.getDB('admin');
+var shards = mongos.getCollection('config.shards').find().toArray();
+var coll = mongos.getCollection('foo.bar');
+var ns = coll.getFullName();
+var dbName = coll.getDB().getName();
+var shard0 = st.shard0, shard1 = st.shard1;
+
+assert.commandWorked(admin.runCommand({enableSharding: dbName}));
+printjson(admin.runCommand({movePrimary: dbName, to: shards[0]._id}));
+assert.commandWorked(admin.runCommand({shardCollection: ns, key: {_id: 1}}));
+
+jsTest.log('Moving some chunks to shard1...');
+
+assert.commandWorked(admin.runCommand({split: ns, middle: {_id: 0}}));
+assert.commandWorked(admin.runCommand({split: ns, middle: {_id: 1}}));
+assert.commandWorked(admin.runCommand({moveChunk: ns,
+ find: {_id: 0},
+ to: shards[1]._id,
+ _waitForDelete: true}));
+assert.commandWorked(admin.runCommand({moveChunk: ns,
+ find: {_id: 1},
+ to: shards[1]._id,
+ _waitForDelete: true}));
+
+
+function getMetadata(shard) {
+ var admin = shard.getDB('admin'),
+ metadata = admin.runCommand({
+ getShardVersion: ns, fullMetadata: true
+ }).metadata;
+
+ jsTest.log('Got metadata: ' + tojson(metadata));
+ return metadata;
+}
+
+var metadata = getMetadata(shard1);
+assert.eq(metadata.pending[0][0]._id, 1);
+assert.eq(metadata.pending[0][1]._id, MaxKey);
+
+jsTest.log('Moving some chunks back to shard0 after empty...');
+
+assert.commandWorked(admin.runCommand({moveChunk: ns,
+ find: {_id: -1},
+ to: shards[1]._id,
+ _waitForDelete: true}));
+
+metadata = getMetadata(shard0);
+assert.eq(metadata.shardVersion.t, 0);
+assert.neq(metadata.collVersion.t, 0);
+assert.eq(metadata.pending.length, 0);
+
+assert.commandWorked(admin.runCommand({moveChunk: ns,
+ find: {_id: 1},
+ to: shards[0]._id,
+ _waitForDelete: true}));
+
+metadata = getMetadata(shard0);
+assert.eq(metadata.shardVersion.t, 0);
+assert.neq(metadata.collVersion.t, 0);
+assert.eq(metadata.pending[0][0]._id, 1);
+assert.eq(metadata.pending[0][1]._id, MaxKey);
+
+// The pending chunk should be promoted to a real chunk when shard0 reloads
+// its config.
+jsTest.log('Checking that pending chunk is promoted on reload...');
+
+assert.eq(null, coll.findOne({_id: 1}));
+
+metadata = getMetadata(shard0);
+assert.neq(metadata.shardVersion.t, 0);
+assert.neq(metadata.collVersion.t, 0);
+assert.eq(metadata.chunks[0][0]._id, 1);
+assert.eq(metadata.chunks[0][1]._id, MaxKey);
+
+st.printShardingStatus();
+
+jsTest.log('DONE!');
+
+st.stop();
+
+})()