summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaloian Manassiev <kaloian.manassiev@mongodb.com>2016-03-30 17:25:51 -0400
committerKaloian Manassiev <kaloian.manassiev@mongodb.com>2016-03-31 17:58:17 -0400
commit3edc84475b10154a76f268edb5e80ac6ca609411 (patch)
treee05f42b49a69820a0d30a9fcdaf664a16e0cf3a8
parent3ce338f6fc95322141bbf35f982513a831bb74ca (diff)
downloadmongo-3edc84475b10154a76f268edb5e80ac6ca609411.tar.gz
SERVER-23425 Port 3.2 sharding move chunk unit tests
-rwxr-xr-xbuildscripts/smoke.py13
-rw-r--r--jstests/libs/chunk_manipulation_util.js214
-rw-r--r--jstests/libs/cleanup_orphaned_util.js131
-rw-r--r--jstests/sharding/cleanup_orphaned.js17
-rw-r--r--jstests/sharding/cleanup_orphaned_auth.js51
-rw-r--r--jstests/sharding/cleanup_orphaned_basic.js113
-rw-r--r--jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js148
-rw-r--r--jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js124
-rw-r--r--jstests/sharding/cleanup_orphaned_cmd_hashed.js133
-rw-r--r--jstests/sharding/cleanup_orphaned_cmd_prereload.js121
-rw-r--r--jstests/sharding/cleanup_orphaned_compound.js18
-rw-r--r--jstests/sharding/migration_sets_fromMigrate_flag.js167
-rw-r--r--jstests/sharding/migration_with_source_ops.js146
-rw-r--r--src/mongo/s/d_migrate.cpp17
14 files changed, 1278 insertions, 135 deletions
diff --git a/buildscripts/smoke.py b/buildscripts/smoke.py
index 6d95b45d610..22eb828f3d3 100755
--- a/buildscripts/smoke.py
+++ b/buildscripts/smoke.py
@@ -469,14 +469,15 @@ def skipTest(path):
authTestsToSkip = [("jstests", "drop2.js"), # SERVER-8589,
("jstests", "killop.js"), # SERVER-10128
+ ("sharding", "cleanup_orphaned_cmd_during_movechunk.js"), # SERVER-21713
+ ("sharding", "cleanup_orphaned_cmd_during_movechunk_hashed.js"), # SERVER-21713
+ ("sharding", "copydb_from_mongos.js"), # SERVER-13080
+ ("sharding", "migration_ignore_interrupts.js"), # SERVER-21713
+ ("sharding", "migration_sets_fromMigrate_flag.js"), # SERVER-21713
+ ("sharding", "migration_with_source_ops.js"), # SERVER-21713
+ ("sharding", "parallel.js"),
("sharding", "sync3.js"), # SERVER-6388 for this and those below
("sharding", "sync6.js"),
- ("sharding", "parallel.js"),
- ("sharding", "copydb_from_mongos.js"), # SERVER-13080
- ("jstests", "bench_test1.js"),
- ("jstests", "bench_test2.js"),
- ("jstests", "bench_test3.js"),
- ("jstests", "bench_test_insert.js"),
("core", "bench_test1.js"),
("core", "bench_test2.js"),
("core", "bench_test3.js"),
diff --git a/jstests/libs/chunk_manipulation_util.js b/jstests/libs/chunk_manipulation_util.js
new file mode 100644
index 00000000000..a334cbe8aec
--- /dev/null
+++ b/jstests/libs/chunk_manipulation_util.js
@@ -0,0 +1,214 @@
+//
+// Utilities for testing chunk manipulation: moveChunk, mergeChunks, etc.
+//
+
+load('./jstests/libs/test_background_ops.js');
+
+//
+// Start a background moveChunk.
+// staticMongod: Server to use for communication, use
+// "MongoRunner.runMongod({})" to make one.
+// mongosURL: Like 'localhost:27017'.
+// findCriteria: Like { _id: 1 }, passed to moveChunk's "find" option.
+// bounds: Array of two documents that specify the lower and upper
+// shard key values of a chunk to move. Specify either the
+// bounds field or the find field but not both.
+// ns: Like 'dbName.collectionName'.
+// toShardId: Like 'shard0001'.
+//
+// Returns a join function; call it to wait for moveChunk to complete.
+//
+
+function moveChunkParallel(staticMongod, mongosURL, findCriteria, bounds, ns, toShardId) {
+ assert((findCriteria || bounds) && !(findCriteria && bounds),
+ 'Specify either findCriteria or bounds, but not both.');
+
+ function runMoveChunk(mongosURL, findCriteria, bounds, ns, toShardId) {
+ assert(mongosURL && ns && toShardId, 'Missing arguments.');
+ assert((findCriteria || bounds) && !(findCriteria && bounds),
+ 'Specify either findCriteria or bounds, but not both.');
+
+ var mongos = new Mongo(mongosURL), admin = mongos.getDB('admin'), cmd = {
+ moveChunk: ns
+ };
+
+ if (findCriteria) {
+ cmd.find = findCriteria;
+ } else {
+ cmd.bounds = bounds;
+ }
+
+ cmd.to = toShardId;
+ cmd._waitForDelete = true;
+
+ printjson(cmd);
+ var result = admin.runCommand(cmd);
+ printjson(result);
+ assert(result.ok);
+ }
+
+ // Return the join function.
+ return startParallelOps(
+ staticMongod, runMoveChunk, [mongosURL, findCriteria, bounds, ns, toShardId]);
+}
+
+// moveChunk starts at step 0 and proceeds to 1 (it has *finished* parsing
+// options), 2 (it has reloaded config and got distributed lock) and so on.
+var moveChunkStepNames = {
+ parsedOptions: 1,
+ gotDistLock: 2,
+ startedMoveChunk: 3, // called _recvChunkStart on recipient
+ reachedSteadyState: 4, // recipient reports state is "steady"
+ committed: 5,
+ done: 6
+};
+
+function numberToName(names, stepNumber) {
+ for (var name in names) {
+ if (names.hasOwnProperty(name) && names[name] == stepNumber) {
+ return name;
+ }
+ }
+
+ assert(false);
+}
+
+//
+// Configure a failpoint to make moveChunk hang at a step.
+//
+function pauseMoveChunkAtStep(shardConnection, stepNumber) {
+ configureMoveChunkFailPoint(shardConnection, stepNumber, 'alwaysOn');
+}
+
+//
+// Allow moveChunk to proceed past a step.
+//
+function unpauseMoveChunkAtStep(shardConnection, stepNumber) {
+ configureMoveChunkFailPoint(shardConnection, stepNumber, 'off');
+}
+
+function proceedToMoveChunkStep(shardConnection, stepNumber) {
+ jsTest.log('moveChunk proceeding from step "' +
+ numberToName(moveChunkStepNames, stepNumber - 1) + '" to "' +
+ numberToName(moveChunkStepNames, stepNumber) + '".');
+
+ pauseMoveChunkAtStep(shardConnection, stepNumber);
+ unpauseMoveChunkAtStep(shardConnection, stepNumber - 1);
+ waitForMoveChunkStep(shardConnection, stepNumber);
+}
+
+function configureMoveChunkFailPoint(shardConnection, stepNumber, mode) {
+ assert.between(migrateStepNames.copiedIndexes,
+ stepNumber,
+ migrateStepNames.done,
+ "incorrect stepNumber",
+ true);
+ var admin = shardConnection.getDB('admin');
+ admin.runCommand({configureFailPoint: 'moveChunkHangAtStep' + stepNumber, mode: mode});
+}
+
+//
+// Wait for moveChunk to reach a step (1 through 6). Assumes only one moveChunk
+// is in mongos's currentOp.
+//
+function waitForMoveChunkStep(shardConnection, stepNumber) {
+ var searchString = 'step ' + stepNumber, admin = shardConnection.getDB('admin');
+
+ assert.between(migrateStepNames.copiedIndexes,
+ stepNumber,
+ migrateStepNames.done,
+ "incorrect stepNumber",
+ true);
+
+ var msg = ('moveChunk on ' + shardConnection.shardName + ' never reached step "' +
+ numberToName(moveChunkStepNames, stepNumber) + '".');
+
+ assert.soon(function() {
+ var in_progress = admin.currentOp().inprog;
+ for (var i = 0; i < in_progress.length; ++i) {
+ var op = in_progress[i];
+ if (op.query && op.query.moveChunk) {
+ return op.msg && op.msg.startsWith(searchString);
+ }
+ }
+
+ return false;
+ }, msg);
+}
+
+var migrateStepNames = {
+ copiedIndexes: 1,
+ deletedPriorDataInRange: 2,
+ cloned: 3,
+ catchup: 4, // About to enter steady state.
+ steady: 5,
+ done: 6
+};
+
+//
+// Configure a failpoint to make migration thread hang at a step (1 through 5).
+//
+function pauseMigrateAtStep(shardConnection, stepNumber) {
+ configureMigrateFailPoint(shardConnection, stepNumber, 'alwaysOn');
+}
+
+//
+// Allow _recvChunkStart to proceed past a step.
+//
+function unpauseMigrateAtStep(shardConnection, stepNumber) {
+ configureMigrateFailPoint(shardConnection, stepNumber, 'off');
+}
+
+function proceedToMigrateStep(shardConnection, stepNumber) {
+ jsTest.log('Migration thread proceeding from step "' +
+ numberToName(migrateStepNames, stepNumber - 1) + '" to "' +
+ numberToName(migrateStepNames, stepNumber) + '".');
+
+ pauseMigrateAtStep(shardConnection, stepNumber);
+ unpauseMigrateAtStep(shardConnection, stepNumber - 1);
+ waitForMigrateStep(shardConnection, stepNumber);
+}
+
+function configureMigrateFailPoint(shardConnection, stepNumber, mode) {
+ assert.between(migrateStepNames.copiedIndexes,
+ stepNumber,
+ migrateStepNames.done,
+ "incorrect stepNumber",
+ true);
+
+ var admin = shardConnection.getDB('admin');
+ admin.runCommand({configureFailPoint: 'migrateThreadHangAtStep' + stepNumber, mode: mode});
+}
+
+//
+// Wait for moveChunk to reach a step (1 through 6).
+//
+function waitForMigrateStep(shardConnection, stepNumber) {
+ var searchString = 'step ' + stepNumber, admin = shardConnection.getDB('admin');
+
+ assert.between(migrateStepNames.copiedIndexes,
+ stepNumber,
+ migrateStepNames.done,
+ "incorrect stepNumber",
+ true);
+
+ var msg = ('Migrate thread on ' + shardConnection.shardName + ' never reached step "' +
+ numberToName(migrateStepNames, stepNumber) + '".');
+
+ assert.soon(function() {
+ // verbose = True so we see the migration thread.
+ var in_progress = admin.currentOp(true).inprog;
+ for (var i = 0; i < in_progress.length; ++i) {
+ var op = in_progress[i];
+ if (op.desc && op.desc === 'migrateThread') {
+ if (op.hasOwnProperty('msg')) {
+ return op.msg.startsWith(searchString);
+ } else {
+ return false;
+ }
+ }
+ }
+
+ return false;
+ }, msg);
+}
diff --git a/jstests/libs/cleanup_orphaned_util.js b/jstests/libs/cleanup_orphaned_util.js
new file mode 100644
index 00000000000..cfd69ab128f
--- /dev/null
+++ b/jstests/libs/cleanup_orphaned_util.js
@@ -0,0 +1,131 @@
+//
+// Utilities for testing cleanupOrphaned command.
+//
+
+//
+// Run cleanupOrphaned on a shard, and assert cleanupOrphaned runs the
+// expected number of times before stopping.
+//
+function cleanupOrphaned(shardConnection, ns, expectedIterations) {
+ var admin = shardConnection.getDB('admin'), result = admin.runCommand({cleanupOrphaned: ns}),
+ iterations = 1;
+
+ if (!result.ok) {
+ printjson(result);
+ }
+ assert(result.ok);
+ while (result.stoppedAtKey) {
+ result = admin.runCommand({cleanupOrphaned: ns, startingFromKey: result.stoppedAtKey});
+
+ assert(result.ok);
+ ++iterations;
+ }
+
+ assert.eq(iterations,
+ expectedIterations,
+ 'Expected to run ' +
+ 'cleanupOrphaned' + expectedIterations + ' times, but it only ran ' + iterations +
+ ' times before stoppedAtKey was null.');
+}
+
+// Shards data from key range, then inserts orphan documents, runs cleanupOrphans
+// and makes sure that orphans are removed.
+// Pass an options object like:
+// {
+// shardKey: { a: 1, b: 1 },
+// keyGen: function() { return [{ a: 'foo', b: 1 }, { a: 'bar', b: 2 }]; }
+// }
+function testCleanupOrphaned(options) {
+ var st = new ShardingTest({shards: 2, mongos: 2});
+
+ var mongos = st.s0, admin = mongos.getDB('admin'),
+ shards = mongos.getCollection('config.shards').find().toArray(),
+ coll = mongos.getCollection('foo.bar'),
+ shard0Coll = st.shard0.getCollection(coll.getFullName()), keys = options.keyGen(),
+ beginning = keys[0], oneQuarter = keys[Math.round(keys.length / 4)],
+ middle = keys[Math.round(keys.length / 2)],
+ threeQuarters = keys[Math.round(3 * keys.length / 4)];
+
+ assert.commandWorked(admin.runCommand({enableSharding: coll.getDB().getName()}));
+
+ printjson(admin.runCommand({movePrimary: coll.getDB() + "", to: shards[0]._id}));
+
+ assert.commandWorked(
+ admin.runCommand({shardCollection: coll.getFullName(), key: options.shardKey}));
+
+ st.printShardingStatus();
+
+ jsTest.log('Inserting some regular docs...');
+
+ assert.commandWorked(admin.runCommand({split: coll.getFullName(), middle: middle}));
+
+ assert.commandWorked(admin.runCommand(
+ {moveChunk: coll.getFullName(), find: middle, to: shards[1]._id, _waitForDelete: true}));
+
+ for (var i = 0; i < keys.length; i++)
+ coll.insert(keys[i]);
+ assert.eq(null, coll.getDB().getLastError());
+
+ // Half of the data is on each shard:
+ // shard 0: [beginning, middle)
+ // shard 1: [middle, end)
+ //
+ assert.eq(keys.length / 2, shard0Coll.count());
+ assert.eq(keys.length, coll.find().itcount());
+
+ jsTest.log('Inserting some orphaned docs...');
+
+ shard0Coll.insert(threeQuarters);
+
+ // I'll represent the orphan doc like {threeQuarters}, in this diagram:
+ //
+ // shard 0: [beginning, middle) {threeQuarters}
+ // shard 1: [middle, end)
+ assert.eq(null, shard0Coll.getDB().getLastError());
+ assert.eq(1 + keys.length / 2, shard0Coll.count());
+
+ jsTest.log('Cleaning up orphaned data...');
+
+ cleanupOrphaned(st.shard0, coll.getFullName(), 2);
+ assert.eq(keys.length / 2, shard0Coll.count());
+ assert.eq(keys.length, coll.find().itcount());
+
+ jsTest.log('Moving half the data out again (making a hole)...');
+
+ assert.commandWorked(admin.runCommand({split: coll.getFullName(), middle: oneQuarter}));
+
+ assert.commandWorked(admin.runCommand({
+ moveChunk: coll.getFullName(),
+ find: beginning,
+ to: shards[1]._id,
+ _waitForDelete: true
+ }));
+
+ // 1/4 of the data is on the first shard.
+ // shard 0: [threeQuarters, middle)
+ // shard 1: [beginning, threeQuarters) [middle, end)
+ assert.eq(Math.round(keys.length / 4), shard0Coll.count());
+ assert.eq(keys.length, coll.find().itcount());
+
+ jsTest.log('Inserting some more orphaned docs...');
+
+ shard0Coll.insert(beginning);
+ shard0Coll.insert(middle);
+ assert.eq(null, shard0Coll.getDB().getLastError());
+
+ // shard 0: {beginning} [threeQuarters, middle) {middle}
+ // shard 1: [beginning, threeQuarters) [middle, end)
+ assert.eq(2 + Math.round(keys.length / 4), shard0Coll.count());
+ assert.eq(100, coll.find().itcount());
+
+ jsTest.log('Cleaning up more orphaned data...');
+
+ // Now cleanupOrphaned must iterate over 3 regions, not 2.
+ cleanupOrphaned(st.shard0, coll.getFullName(), 3);
+ assert.eq(Math.round(keys.length / 4), shard0Coll.count());
+ assert.eq(keys.length, coll.find().itcount());
+
+ jsTest.log('DONE!');
+
+ st.stop();
+}
diff --git a/jstests/sharding/cleanup_orphaned.js b/jstests/sharding/cleanup_orphaned.js
new file mode 100644
index 00000000000..a63991f7a23
--- /dev/null
+++ b/jstests/sharding/cleanup_orphaned.js
@@ -0,0 +1,17 @@
+//
+// Shards data from the key range, then inserts orphan documents, runs cleanupOrphans
+// and makes sure that orphans are removed. Uses an _id as a shard key.
+//
+
+load('./jstests/libs/cleanup_orphaned_util.js');
+
+testCleanupOrphaned({
+ shardKey: {_id: 1},
+ keyGen: function() {
+ var ids = [];
+ for (var i = -50; i < 50; i++) {
+ ids.push({_id: i});
+ }
+ return ids;
+ }
+});
diff --git a/jstests/sharding/cleanup_orphaned_auth.js b/jstests/sharding/cleanup_orphaned_auth.js
new file mode 100644
index 00000000000..4e9a3e0214c
--- /dev/null
+++ b/jstests/sharding/cleanup_orphaned_auth.js
@@ -0,0 +1,51 @@
+//
+// Tests of cleanupOrphaned command permissions.
+//
+
+(function() {
+ "use strict";
+
+ function assertUnauthorized(res, msg) {
+ if (assert._debug && msg)
+ print("in assert for: " + msg);
+
+ if (res.ok == 0 && res.errmsg.startsWith('not authorized'))
+ return;
+
+ var finalMsg = "command worked when it should have been unauthorized: " + tojson(res);
+ if (msg) {
+ finalMsg += " : " + msg;
+ }
+ doassert(finalMsg);
+ }
+
+ var st =
+ new ShardingTest({auth: true, keyFile: 'jstests/libs/key1', other: {useHostname: false}});
+
+ var mongosAdmin = st.s0.getDB('admin');
+ var coll = st.s0.getCollection('foo.bar');
+
+ mongosAdmin.createUser(
+ {user: 'admin', pwd: 'x', roles: ['clusterAdmin', 'userAdminAnyDatabase']});
+ mongosAdmin.auth('admin', 'x');
+
+ assert.commandWorked(mongosAdmin.runCommand({enableSharding: coll.getDB().getName()}));
+
+ assert.commandWorked(
+ mongosAdmin.runCommand({shardCollection: coll.getFullName(), key: {_id: 'hashed'}}));
+
+ // cleanupOrphaned requires auth as admin user.
+ var shardAdmin = st.shard0.getDB('admin');
+ assert.commandWorked(shardAdmin.logout());
+ assertUnauthorized(shardAdmin.runCommand({cleanupOrphaned: 'foo.bar'}));
+
+ var fooDB = st.shard0.getDB('foo');
+ shardAdmin.auth('admin', 'x');
+ fooDB.createUser({user: 'user', pwd: 'x', roles: ['readWrite', 'dbAdmin']});
+ shardAdmin.logout();
+ fooDB.auth('user', 'x');
+ assertUnauthorized(shardAdmin.runCommand({cleanupOrphaned: 'foo.bar'}));
+
+ st.stop();
+
+})();
diff --git a/jstests/sharding/cleanup_orphaned_basic.js b/jstests/sharding/cleanup_orphaned_basic.js
new file mode 100644
index 00000000000..e03e093fd82
--- /dev/null
+++ b/jstests/sharding/cleanup_orphaned_basic.js
@@ -0,0 +1,113 @@
+//
+// Basic tests of cleanupOrphaned. Validates that non allowed uses of the cleanupOrphaned
+// command fail.
+//
+
+(function() {
+ "use strict";
+
+ /*****************************************************************************
+ * Unsharded mongod.
+ ****************************************************************************/
+
+ // cleanupOrphaned fails against unsharded mongod.
+ var mongod = MongoRunner.runMongod();
+ assert.commandFailed(mongod.getDB('admin').runCommand({cleanupOrphaned: 'foo.bar'}));
+
+ /*****************************************************************************
+ * Bad invocations of cleanupOrphaned command.
+ ****************************************************************************/
+
+ var st = new ShardingTest({other: {rs: true, rsOptions: {nodes: 2}}});
+
+ var mongos = st.s0;
+ var mongosAdmin = mongos.getDB('admin');
+ var dbName = 'foo';
+ var collectionName = 'bar';
+ var ns = dbName + '.' + collectionName;
+ var coll = mongos.getCollection(ns);
+
+ // cleanupOrphaned fails against mongos ('no such command'): it must be run
+ // on mongod.
+ assert.commandFailed(mongosAdmin.runCommand({cleanupOrphaned: ns}));
+
+ // cleanupOrphaned must be run on admin DB.
+ var shardFooDB = st.shard0.getDB(dbName);
+ assert.commandFailed(shardFooDB.runCommand({cleanupOrphaned: ns}));
+
+ // Must be run on primary.
+ var secondaryAdmin = st.rs0.getSecondary().getDB('admin');
+ var response = secondaryAdmin.runCommand({cleanupOrphaned: ns});
+ print('cleanupOrphaned on secondary:');
+ printjson(response);
+ assert.commandFailed(response);
+
+ var shardAdmin = st.shard0.getDB('admin');
+ var badNS = ' \\/."*<>:|?';
+ assert.commandFailed(shardAdmin.runCommand({cleanupOrphaned: badNS}));
+
+ // cleanupOrphaned works on sharded collection.
+ assert.commandWorked(mongosAdmin.runCommand({enableSharding: coll.getDB().getName()}));
+
+ st.ensurePrimaryShard(coll.getDB().getName(), st.shard0.shardName);
+
+ assert.commandWorked(mongosAdmin.runCommand({shardCollection: ns, key: {_id: 1}}));
+
+ assert.commandWorked(shardAdmin.runCommand({cleanupOrphaned: ns}));
+
+ /*****************************************************************************
+ * Empty shard.
+ ****************************************************************************/
+
+ // Ping shard[1] so it will be aware that it is sharded. Otherwise cleanupOrphaned
+ // may fail.
+ assert.commandWorked(mongosAdmin.runCommand(
+ {moveChunk: coll.getFullName(), find: {_id: 1}, to: st.shard1.shardName}));
+
+ assert.commandWorked(mongosAdmin.runCommand(
+ {moveChunk: coll.getFullName(), find: {_id: 1}, to: st.shard0.shardName}));
+
+ // Collection's home is shard0, there are no chunks assigned to shard1.
+ st.shard1.getCollection(ns).insert({});
+ assert.eq(null, st.shard1.getDB(dbName).getLastError());
+ assert.eq(1, st.shard1.getCollection(ns).count());
+ response = st.shard1.getDB('admin').runCommand({cleanupOrphaned: ns});
+ assert.commandWorked(response);
+ assert.eq({_id: {$maxKey: 1}}, response.stoppedAtKey);
+ assert.eq(0,
+ st.shard1.getCollection(ns).count(),
+ "cleanupOrphaned didn't delete orphan on empty shard.");
+
+ /*****************************************************************************
+ * Bad startingFromKeys.
+ ****************************************************************************/
+
+ // startingFromKey of MaxKey.
+ response = shardAdmin.runCommand({cleanupOrphaned: ns, startingFromKey: {_id: MaxKey}});
+ assert.commandWorked(response);
+ assert.eq(null, response.stoppedAtKey);
+
+ // startingFromKey doesn't match number of fields in shard key.
+ assert.commandFailed(shardAdmin.runCommand(
+ {cleanupOrphaned: ns, startingFromKey: {someKey: 'someValue', someOtherKey: 1}}));
+
+ // startingFromKey matches number of fields in shard key but not field names.
+ assert.commandFailed(
+ shardAdmin.runCommand({cleanupOrphaned: ns, startingFromKey: {someKey: 'someValue'}}));
+
+ var coll2 = mongos.getCollection('foo.baz');
+
+ assert.commandWorked(
+ mongosAdmin.runCommand({shardCollection: coll2.getFullName(), key: {a: 1, b: 1}}));
+
+ // startingFromKey doesn't match number of fields in shard key.
+ assert.commandFailed(shardAdmin.runCommand(
+ {cleanupOrphaned: coll2.getFullName(), startingFromKey: {someKey: 'someValue'}}));
+
+ // startingFromKey matches number of fields in shard key but not field names.
+ assert.commandFailed(shardAdmin.runCommand(
+ {cleanupOrphaned: coll2.getFullName(), startingFromKey: {a: 'someValue', c: 1}}));
+
+ st.stop();
+
+})();
diff --git a/jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js
new file mode 100644
index 00000000000..68cb7688981
--- /dev/null
+++ b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js
@@ -0,0 +1,148 @@
+//
+// Tests cleanupOrphaned concurrent with moveChunk.
+// Inserts orphan documents to the donor and recipient shards during the moveChunk and
+// verifies that cleanupOrphaned removes orphans.
+//
+
+load('./jstests/libs/chunk_manipulation_util.js');
+load('./jstests/libs/cleanup_orphaned_util.js');
+
+(function() {
+ "use strict";
+
+ var staticMongod = MongoRunner.runMongod({}); // For startParallelOps.
+ var st = new ShardingTest({shards: 2, other: {separateConfig: true}});
+
+ var mongos = st.s0, admin = mongos.getDB('admin'), dbName = 'foo', ns = dbName + '.bar',
+ coll = mongos.getCollection(ns), donor = st.shard0, recipient = st.shard1,
+ donorColl = donor.getCollection(ns), recipientColl = st.shard1.getCollection(ns);
+
+ // Three chunks of 10 documents each, with ids -20, -18, -16, ..., 38.
+ // Donor: [minKey, 0) [0, 20)
+ // Recipient: [20, maxKey)
+ assert.commandWorked(admin.runCommand({enableSharding: dbName}));
+ printjson(admin.runCommand({movePrimary: dbName, to: st.shard0.shardName}));
+ assert.commandWorked(admin.runCommand({shardCollection: ns, key: {_id: 1}}));
+ assert.commandWorked(admin.runCommand({split: ns, middle: {_id: 0}}));
+ assert.commandWorked(admin.runCommand({split: ns, middle: {_id: 20}}));
+ assert.commandWorked(admin.runCommand(
+ {moveChunk: ns, find: {_id: 20}, to: st.shard1.shardName, _waitForDelete: true}));
+
+ jsTest.log('Inserting 40 docs into shard 0....');
+ for (var i = -20; i < 20; i += 2)
+ coll.insert({_id: i});
+ assert.eq(null, coll.getDB().getLastError());
+ assert.eq(20, donorColl.count());
+
+ jsTest.log('Inserting 25 docs into shard 1....');
+ for (i = 20; i < 40; i += 2)
+ coll.insert({_id: i});
+ assert.eq(null, coll.getDB().getLastError());
+ assert.eq(10, recipientColl.count());
+
+ //
+ // Start a moveChunk in the background. Move chunk [0, 20), which has 10 docs,
+ // from shard 0 to shard 1. Pause it at some points in the donor's and
+ // recipient's work flows, and test cleanupOrphaned on shard 0 and shard 1.
+ //
+
+ jsTest.log('setting failpoint startedMoveChunk');
+ pauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk);
+ pauseMigrateAtStep(recipient, migrateStepNames.cloned);
+ var joinMoveChunk = moveChunkParallel(
+ staticMongod, st.s0.host, {_id: 0}, null, coll.getFullName(), st.shard1.shardName);
+
+ waitForMoveChunkStep(donor, moveChunkStepNames.startedMoveChunk);
+ waitForMigrateStep(recipient, migrateStepNames.cloned);
+ // Recipient has run _recvChunkStart and begun its migration thread; docs have
+ // been cloned and chunk [0, 20) is noted as 'pending' on recipient.
+
+ // Donor: [minKey, 0) [0, 20)
+ // Recipient (pending): [0, 20)
+ // Recipient: [20, maxKey)
+
+ // Create orphans. I'll show an orphaned doc on donor with _id 26 like {26}:
+ //
+ // Donor: [minKey, 0) [0, 20) {26}
+ // Recipient (pending): [0, 20)
+ // Recipient: {-1} [20, maxKey)
+ donorColl.insert([{_id: 26}]);
+ assert.eq(null, donorColl.getDB().getLastError());
+ assert.eq(21, donorColl.count());
+ recipientColl.insert([{_id: -1}]);
+ assert.eq(null, recipientColl.getDB().getLastError());
+ assert.eq(21, recipientColl.count());
+
+ cleanupOrphaned(donor, ns, 2);
+ assert.eq(20, donorColl.count());
+ cleanupOrphaned(recipient, ns, 2);
+ assert.eq(20, recipientColl.count());
+
+ jsTest.log('Inserting document on donor side');
+ // Inserted a new document (not an orphan) with id 19, which belongs in the
+ // [0, 20) chunk.
+ donorColl.insert({_id: 19});
+ assert.eq(null, coll.getDB().getLastError());
+ assert.eq(21, donorColl.count());
+
+ // Recipient transfers this modification.
+ jsTest.log('Let migrate proceed to transferredMods');
+ pauseMigrateAtStep(recipient, migrateStepNames.catchup);
+ unpauseMigrateAtStep(recipient, migrateStepNames.cloned);
+ waitForMigrateStep(recipient, migrateStepNames.catchup);
+ jsTest.log('Done letting migrate proceed to transferredMods');
+
+ assert.eq(21, recipientColl.count(), "Recipient didn't transfer inserted document.");
+
+ cleanupOrphaned(donor, ns, 2);
+ assert.eq(21, donorColl.count());
+ cleanupOrphaned(recipient, ns, 2);
+ assert.eq(21, recipientColl.count());
+
+ // Create orphans.
+ donorColl.insert([{_id: 26}]);
+ assert.eq(null, donorColl.getDB().getLastError());
+ assert.eq(22, donorColl.count());
+ recipientColl.insert([{_id: -1}]);
+ assert.eq(null, recipientColl.getDB().getLastError());
+ assert.eq(22, recipientColl.count());
+
+ cleanupOrphaned(donor, ns, 2);
+ assert.eq(21, donorColl.count());
+ cleanupOrphaned(recipient, ns, 2);
+ assert.eq(21, recipientColl.count());
+
+ // Recipient has been waiting for donor to call _recvChunkCommit.
+ pauseMoveChunkAtStep(donor, moveChunkStepNames.committed);
+ unpauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk);
+ proceedToMigrateStep(recipient, migrateStepNames.steady);
+ proceedToMigrateStep(recipient, migrateStepNames.done);
+
+ // Create orphans.
+ donorColl.insert([{_id: 26}]);
+ assert.eq(null, donorColl.getDB().getLastError());
+ assert.eq(22, donorColl.count());
+ recipientColl.insert([{_id: -1}]);
+ assert.eq(null, recipientColl.getDB().getLastError());
+ assert.eq(22, recipientColl.count());
+
+ // cleanupOrphaned should still fail on donor, but should work on the recipient
+ cleanupOrphaned(donor, ns, 2);
+ assert.eq(10, donorColl.count());
+ cleanupOrphaned(recipient, ns, 2);
+ assert.eq(21, recipientColl.count());
+
+ // Let migration thread complete.
+ unpauseMigrateAtStep(recipient, migrateStepNames.done);
+ unpauseMoveChunkAtStep(donor, moveChunkStepNames.committed);
+ joinMoveChunk();
+
+ // Donor has finished post-move delete.
+ cleanupOrphaned(donor, ns, 2); // this is necessary for the count to not be 11
+ assert.eq(10, donorColl.count());
+ assert.eq(21, recipientColl.count());
+ assert.eq(31, coll.count());
+
+ st.stop();
+
+})();
diff --git a/jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js
new file mode 100644
index 00000000000..58ea9e806fd
--- /dev/null
+++ b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js
@@ -0,0 +1,124 @@
+//
+//
+// Tests cleanupOrphaned concurrent with moveChunk with a hashed shard key.
+// Inserts orphan documents to the donor and recipient shards during the moveChunk and
+// verifies that cleanupOrphaned removes orphans.
+//
+
+load('./jstests/libs/chunk_manipulation_util.js');
+load('./jstests/libs/cleanup_orphaned_util.js');
+
+(function() {
+ "use strict";
+
+ var staticMongod = MongoRunner.runMongod({}); // For startParallelOps.
+ var st = new ShardingTest({shards: 2, other: {separateConfig: true}});
+
+ var mongos = st.s0, admin = mongos.getDB('admin'),
+ shards = mongos.getCollection('config.shards').find().toArray(), dbName = 'foo',
+ ns = dbName + '.bar', coll = mongos.getCollection(ns);
+
+ assert.commandWorked(admin.runCommand({enableSharding: dbName}));
+ printjson(admin.runCommand({movePrimary: dbName, to: shards[0]._id}));
+ assert.commandWorked(admin.runCommand({shardCollection: ns, key: {key: 'hashed'}}));
+
+ // Makes four chunks by default, two on each shard.
+ var chunks = st.config.chunks.find().sort({min: 1}).toArray();
+ assert.eq(4, chunks.length);
+
+ var chunkWithDoc = chunks[1];
+ print('Trying to make doc that hashes to this chunk: ' + tojson(chunkWithDoc));
+
+ var found = false;
+ for (var i = 0; i < 10000; i++) {
+ var doc =
+ {
+ key: ObjectId()
+ },
+ hash = mongos.adminCommand({_hashBSONElement: doc.key}).out;
+
+ print('doc.key ' + doc.key + ' hashes to ' + hash);
+
+ if (mongos.getCollection('config.chunks')
+ .findOne(
+ {_id: chunkWithDoc._id, 'min.key': {$lte: hash}, 'max.key': {$gt: hash}})) {
+ found = true;
+ break;
+ }
+ }
+
+ assert(found, "Couldn't make doc that belongs to chunk 1.");
+ print('Doc: ' + tojson(doc));
+ coll.insert(doc);
+ assert.eq(null, coll.getDB().getLastError());
+
+ //
+ // Start a moveChunk in the background from shard 0 to shard 1. Pause it at
+ // some points in the donor's and recipient's work flows, and test
+ // cleanupOrphaned.
+ //
+
+ var donor, recip;
+ if (chunkWithDoc.shard == st.shard0.shardName) {
+ donor = st.shard0;
+ recip = st.shard1;
+ } else {
+ recip = st.shard0;
+ donor = st.shard1;
+ }
+
+ jsTest.log('setting failpoint startedMoveChunk');
+ pauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk);
+ pauseMigrateAtStep(recip, migrateStepNames.cloned);
+
+ var joinMoveChunk = moveChunkParallel(staticMongod,
+ st.s0.host,
+ null,
+ [chunkWithDoc.min, chunkWithDoc.max], // bounds
+ coll.getFullName(),
+ recip.shardName);
+
+ waitForMoveChunkStep(donor, moveChunkStepNames.startedMoveChunk);
+ waitForMigrateStep(recip, migrateStepNames.cloned);
+ proceedToMigrateStep(recip, migrateStepNames.catchup);
+ // recipient has run _recvChunkStart and begun its migration thread;
+ // 'doc' has been cloned and chunkWithDoc is noted as 'pending' on recipient.
+
+ var donorColl = donor.getCollection(ns), recipColl = recip.getCollection(ns);
+
+ assert.eq(1, donorColl.count());
+ assert.eq(1, recipColl.count());
+
+ // cleanupOrphaned should go through two iterations, since the default chunk
+ // setup leaves two unowned ranges on each shard.
+ cleanupOrphaned(donor, ns, 2);
+ cleanupOrphaned(recip, ns, 2);
+ assert.eq(1, donorColl.count());
+ assert.eq(1, recipColl.count());
+
+ // recip has been waiting for donor to call _recvChunkCommit.
+ pauseMoveChunkAtStep(donor, moveChunkStepNames.committed);
+ unpauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk);
+ proceedToMigrateStep(recip, migrateStepNames.steady);
+ proceedToMigrateStep(recip, migrateStepNames.done);
+
+ // cleanupOrphaned removes migrated data from donor. The donor would
+ // otherwise clean them up itself, in the post-move delete phase.
+ cleanupOrphaned(donor, ns, 2);
+ assert.eq(0, donorColl.count());
+ cleanupOrphaned(recip, ns, 2);
+ assert.eq(1, recipColl.count());
+
+ // Let migration thread complete.
+ unpauseMigrateAtStep(recip, migrateStepNames.done);
+ unpauseMoveChunkAtStep(donor, moveChunkStepNames.committed);
+ joinMoveChunk();
+
+ // donor has finished post-move delete.
+ assert.eq(0, donorColl.count());
+ assert.eq(1, recipColl.count());
+ assert.eq(1, coll.count());
+
+ st.stop();
+
+})();
diff --git a/jstests/sharding/cleanup_orphaned_cmd_hashed.js b/jstests/sharding/cleanup_orphaned_cmd_hashed.js
index 52cc1657c1d..a4f9cfb25eb 100644
--- a/jstests/sharding/cleanup_orphaned_cmd_hashed.js
+++ b/jstests/sharding/cleanup_orphaned_cmd_hashed.js
@@ -1,71 +1,78 @@
//
// Tests cleanup of orphaned data in hashed sharded coll via the orphaned data cleanup command
-// @tags : [ hashed ]
//
-var options = { separateConfig : true, shardOptions : { verbose : 2 } };
-
-var st = new ShardingTest({ shards : 2, mongos : 1, other : options });
-st.stopBalancer();
-
-var mongos = st.s0;
-var admin = mongos.getDB( "admin" );
-var shards = mongos.getCollection( "config.shards" ).find().toArray();
-var coll = mongos.getCollection( "foo.bar" );
-
-assert( admin.runCommand({ enableSharding : coll.getDB() + "" }).ok );
-printjson( admin.runCommand({ movePrimary : coll.getDB() + "", to : shards[0]._id }) );
-assert( admin.runCommand({ shardCollection : coll + "", key : { _id : "hashed" } }).ok );
-
-// Create two orphaned data holes, one bounded by min or max on each shard
-
-assert( admin.runCommand({ split : coll + "", middle : { _id : NumberLong(-100) } }).ok );
-assert( admin.runCommand({ split : coll + "", middle : { _id : NumberLong(-50) } }).ok );
-assert( admin.runCommand({ split : coll + "", middle : { _id : NumberLong(50) } }).ok );
-assert( admin.runCommand({ split : coll + "", middle : { _id : NumberLong(100) } }).ok );
-assert( admin.runCommand({ moveChunk : coll + "", bounds : [{ _id : NumberLong(-100) },
- { _id : NumberLong(-50) }],
- to : shards[1]._id,
- _waitForDelete : true }).ok );
-assert( admin.runCommand({ moveChunk : coll + "", bounds : [{ _id : NumberLong(50) },
- { _id : NumberLong(100) }],
- to : shards[0]._id,
- _waitForDelete : true }).ok );
-st.printShardingStatus();
-
-jsTest.log( "Inserting some docs on each shard, so 1/2 will be orphaned..." );
-
-for ( var s = 0; s < 2; s++ ) {
- var shardColl = ( s == 0 ? st.shard0 : st.shard1 ).getCollection( coll + "" );
- var bulk = shardColl.initializeUnorderedBulkOp();
- for ( var i = 0; i < 100; i++ ) bulk.insert({ _id : i });
- assert.writeOK(bulk.execute());
-}
-
-assert.eq( 200, st.shard0.getCollection( coll + "" ).find().itcount() +
- st.shard1.getCollection( coll + "" ).find().itcount() );
-assert.eq( 100, coll.find().itcount() );
-
-jsTest.log( "Cleaning up orphaned data in hashed coll..." );
-
-for ( var s = 0; s < 2; s++ ) {
- var shardAdmin = ( s == 0 ? st.shard0 : st.shard1 ).getDB( "admin" );
-
- var result = shardAdmin.runCommand({ cleanupOrphaned : coll + "" });
- while ( result.ok && result.stoppedAtKey ) {
- printjson( result );
- result = shardAdmin.runCommand({ cleanupOrphaned : coll + "",
- startingFromKey : result.stoppedAtKey });
+(function() {
+ "use strict";
+
+ var st = new ShardingTest({shards: 2, mongos: 1});
+
+ var mongos = st.s0;
+ var admin = mongos.getDB("admin");
+ var coll = mongos.getCollection("foo.bar");
+
+ assert.commandWorked(admin.runCommand({enableSharding: coll.getDB() + ""}));
+ printjson(admin.runCommand({movePrimary: coll.getDB() + "", to: st.shard0.shardName}));
+ assert.commandWorked(admin.runCommand({shardCollection: coll + "", key: {_id: "hashed"}}));
+
+ // Create two orphaned data holes, one bounded by min or max on each shard
+
+ assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: NumberLong(-100)}}));
+ assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: NumberLong(-50)}}));
+ assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: NumberLong(50)}}));
+ assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: NumberLong(100)}}));
+ assert.commandWorked(admin.runCommand({
+ moveChunk: coll + "",
+ bounds: [{_id: NumberLong(-100)}, {_id: NumberLong(-50)}],
+ to: st.shard1.shardName,
+ _waitForDelete: true
+ }));
+ assert.commandWorked(admin.runCommand({
+ moveChunk: coll + "",
+ bounds: [{_id: NumberLong(50)}, {_id: NumberLong(100)}],
+ to: st.shard0.shardName,
+ _waitForDelete: true
+ }));
+ st.printShardingStatus();
+
+ jsTest.log("Inserting some docs on each shard, so 1/2 will be orphaned...");
+
+ for (var s = 0; s < 2; s++) {
+ var shardColl = (s == 0 ? st.shard0 : st.shard1).getCollection(coll + "");
+ var bulk = shardColl.initializeUnorderedBulkOp();
+ for (var i = 0; i < 100; i++)
+ bulk.insert({_id: i});
+ assert.writeOK(bulk.execute());
}
-
- printjson( result );
- assert( result.ok );
-}
-assert.eq( 100, st.shard0.getCollection( coll + "" ).find().itcount() +
- st.shard1.getCollection( coll + "" ).find().itcount() );
-assert.eq( 100, coll.find().itcount() );
+ assert.eq(200,
+ st.shard0.getCollection(coll + "").find().itcount() +
+ st.shard1.getCollection(coll + "").find().itcount());
+ assert.eq(100, coll.find().itcount());
+
+ jsTest.log("Cleaning up orphaned data in hashed coll...");
+
+ for (var s = 0; s < 2; s++) {
+ var shardAdmin = (s == 0 ? st.shard0 : st.shard1).getDB("admin");
+
+ var result = shardAdmin.runCommand({cleanupOrphaned: coll + ""});
+ while (result.ok && result.stoppedAtKey) {
+ printjson(result);
+ result = shardAdmin.runCommand(
+ {cleanupOrphaned: coll + "", startingFromKey: result.stoppedAtKey});
+ }
+
+ printjson(result);
+ assert(result.ok);
+ }
+
+ assert.eq(100,
+ st.shard0.getCollection(coll + "").find().itcount() +
+ st.shard1.getCollection(coll + "").find().itcount());
+ assert.eq(100, coll.find().itcount());
+
+ jsTest.log("DONE!");
-jsTest.log( "DONE!" );
+ st.stop();
-st.stop();
+})();
diff --git a/jstests/sharding/cleanup_orphaned_cmd_prereload.js b/jstests/sharding/cleanup_orphaned_cmd_prereload.js
index 7c3170268f3..7155baea970 100644
--- a/jstests/sharding/cleanup_orphaned_cmd_prereload.js
+++ b/jstests/sharding/cleanup_orphaned_cmd_prereload.js
@@ -2,97 +2,90 @@
// Tests failed cleanup of orphaned data when we have pending chunks
//
-var options = { separateConfig : true, shardOptions : { verbose : 2 } };
-
-var st = new ShardingTest({ shards : 2, mongos : 2, other : options });
-st.stopBalancer();
+var st = new ShardingTest({shards: 2});
var mongos = st.s0;
-var admin = mongos.getDB( "admin" );
-var shards = mongos.getCollection( "config.shards" ).find().toArray();
-var coll = mongos.getCollection( "foo.bar" );
+var admin = mongos.getDB("admin");
+var shards = mongos.getCollection("config.shards").find().toArray();
+var coll = mongos.getCollection("foo.bar");
-assert( admin.runCommand({ enableSharding : coll.getDB() + "" }).ok );
-printjson( admin.runCommand({ movePrimary : coll.getDB() + "", to : shards[0]._id }) );
-assert( admin.runCommand({ shardCollection : coll + "", key : { _id : 1 } }).ok );
+assert(admin.runCommand({enableSharding: coll.getDB() + ""}).ok);
+printjson(admin.runCommand({movePrimary: coll.getDB() + "", to: shards[0]._id}));
+assert(admin.runCommand({shardCollection: coll + "", key: {_id: 1}}).ok);
-jsTest.log( "Moving some chunks to shard1..." );
+jsTest.log("Moving some chunks to shard1...");
-assert( admin.runCommand({ split : coll + "", middle : { _id : 0 } }).ok );
-assert( admin.runCommand({ split : coll + "", middle : { _id : 1 } }).ok );
+assert(admin.runCommand({split: coll + "", middle: {_id: 0}}).ok);
+assert(admin.runCommand({split: coll + "", middle: {_id: 1}}).ok);
-assert( admin.runCommand({ moveChunk : coll + "",
- find : { _id : 0 },
- to : shards[1]._id,
- _waitForDelete : true }).ok );
-assert( admin.runCommand({ moveChunk : coll + "",
- find : { _id : 1 },
- to : shards[1]._id,
- _waitForDelete : true }).ok );
+assert(admin.runCommand(
+ {moveChunk: coll + "", find: {_id: 0}, to: shards[1]._id, _waitForDelete: true})
+ .ok);
+assert(admin.runCommand(
+ {moveChunk: coll + "", find: {_id: 1}, to: shards[1]._id, _waitForDelete: true})
+ .ok);
-var metadata = st.shard1.getDB( "admin" )
- .runCommand({ getShardVersion : coll + "", fullMetadata : true }).metadata;
+var metadata =
+ st.shard1.getDB("admin").runCommand({getShardVersion: coll + "", fullMetadata: true}).metadata;
-printjson( metadata );
+printjson(metadata);
-assert.eq( metadata.pending[0][0]._id, 1 );
-assert.eq( metadata.pending[0][1]._id, MaxKey );
+assert.eq(metadata.pending[0][0]._id, 1);
+assert.eq(metadata.pending[0][1]._id, MaxKey);
-jsTest.log( "Ensuring we won't remove orphaned data in pending chunk..." );
+jsTest.log("Ensuring we won't remove orphaned data in pending chunk...");
-assert( !st.shard1.getDB( "admin" )
- .runCommand({ cleanupOrphaned : coll + "", startingFromKey : { _id : 1 } }).stoppedAtKey );
+assert(!st.shard1.getDB("admin")
+ .runCommand({cleanupOrphaned: coll + "", startingFromKey: {_id: 1}})
+ .stoppedAtKey);
-jsTest.log( "Moving some chunks back to shard0 after empty..." );
+jsTest.log("Moving some chunks back to shard0 after empty...");
-admin.runCommand({ moveChunk : coll + "",
- find : { _id : -1 },
- to : shards[1]._id,
- _waitForDelete : true });
+admin.runCommand({moveChunk: coll + "", find: {_id: -1}, to: shards[1]._id, _waitForDelete: true});
-var metadata = st.shard0.getDB( "admin" )
- .runCommand({ getShardVersion : coll + "", fullMetadata : true }).metadata;
+var metadata =
+ st.shard0.getDB("admin").runCommand({getShardVersion: coll + "", fullMetadata: true}).metadata;
-printjson( metadata );
+printjson(metadata);
-assert.eq( metadata.shardVersion.t, 0 );
-assert.neq( metadata.collVersion.t, 0 );
-assert.eq( metadata.pending.length, 0 );
+assert.eq(metadata.shardVersion.t, 0);
+assert.neq(metadata.collVersion.t, 0);
+assert.eq(metadata.pending.length, 0);
-assert( admin.runCommand({ moveChunk : coll + "",
- find : { _id : 1 },
- to : shards[0]._id,
- _waitForDelete : true }).ok );
+assert(admin.runCommand(
+ {moveChunk: coll + "", find: {_id: 1}, to: shards[0]._id, _waitForDelete: true})
+ .ok);
-var metadata = st.shard0.getDB( "admin" )
- .runCommand({ getShardVersion : coll + "", fullMetadata : true }).metadata;
+var metadata =
+ st.shard0.getDB("admin").runCommand({getShardVersion: coll + "", fullMetadata: true}).metadata;
-printjson( metadata );
-assert.eq( metadata.shardVersion.t, 0 );
-assert.neq( metadata.collVersion.t, 0 );
-assert.eq( metadata.pending[0][0]._id, 1 );
-assert.eq( metadata.pending[0][1]._id, MaxKey );
+printjson(metadata);
+assert.eq(metadata.shardVersion.t, 0);
+assert.neq(metadata.collVersion.t, 0);
+assert.eq(metadata.pending[0][0]._id, 1);
+assert.eq(metadata.pending[0][1]._id, MaxKey);
-jsTest.log( "Ensuring again we won't remove orphaned data in pending chunk..." );
+jsTest.log("Ensuring again we won't remove orphaned data in pending chunk...");
-assert( !st.shard0.getDB( "admin" )
- .runCommand({ cleanupOrphaned : coll + "", startingFromKey : { _id : 1 } }).stoppedAtKey );
+assert(!st.shard0.getDB("admin")
+ .runCommand({cleanupOrphaned: coll + "", startingFromKey: {_id: 1}})
+ .stoppedAtKey);
-jsTest.log( "Checking that pending chunk is promoted on reload..." );
+jsTest.log("Checking that pending chunk is promoted on reload...");
-assert.eq( null, coll.findOne({ _id : 1 }) );
+assert.eq(null, coll.findOne({_id: 1}));
-var metadata = st.shard0.getDB( "admin" )
- .runCommand({ getShardVersion : coll + "", fullMetadata : true }).metadata;
+var metadata =
+ st.shard0.getDB("admin").runCommand({getShardVersion: coll + "", fullMetadata: true}).metadata;
-printjson( metadata );
-assert.neq( metadata.shardVersion.t, 0 );
-assert.neq( metadata.collVersion.t, 0 );
-assert.eq( metadata.chunks[0][0]._id, 1 );
-assert.eq( metadata.chunks[0][1]._id, MaxKey );
+printjson(metadata);
+assert.neq(metadata.shardVersion.t, 0);
+assert.neq(metadata.collVersion.t, 0);
+assert.eq(metadata.chunks[0][0]._id, 1);
+assert.eq(metadata.chunks[0][1]._id, MaxKey);
st.printShardingStatus();
-jsTest.log( "DONE!" );
+jsTest.log("DONE!");
st.stop();
diff --git a/jstests/sharding/cleanup_orphaned_compound.js b/jstests/sharding/cleanup_orphaned_compound.js
new file mode 100644
index 00000000000..ebf7163c77d
--- /dev/null
+++ b/jstests/sharding/cleanup_orphaned_compound.js
@@ -0,0 +1,18 @@
+//
+// Shards data from the key range, then inserts orphan documents, runs cleanupOrphans
+// and makes sure that orphans are removed. Uses a compound shard key.
+//
+
+load('./jstests/libs/cleanup_orphaned_util.js');
+
+testCleanupOrphaned({
+ shardKey: {a: 1, b: 1},
+ keyGen: function() {
+ var ids = [];
+ for (var i = -50; i < 50; i++) {
+ ids.push({a: i, b: Math.random()});
+ }
+
+ return ids;
+ }
+});
diff --git a/jstests/sharding/migration_sets_fromMigrate_flag.js b/jstests/sharding/migration_sets_fromMigrate_flag.js
new file mode 100644
index 00000000000..55dbca8b5fa
--- /dev/null
+++ b/jstests/sharding/migration_sets_fromMigrate_flag.js
@@ -0,0 +1,167 @@
+//
+// Tests whether the fromMigrate flag is correctly set during migrations.
+//
+// Tests:
+// #1 (delete op) fromMigrate is set when recipient shard deletes all documents locally
+// in the chunk range it is about to receive from the donor shard.
+// #2 (delete op) fromMigrate is set when the donor shard deletes documents that have
+// been migrated to another shard.
+// #3 (insert op) fromMigrate is set when the recipient shard receives chunk migration
+// data and inserts it.
+// #4 (update op) fromMigrate is set when an update occurs in the donor shard during
+// migration and is sent to the recipient via the transfer logs.
+// #5 fromMigrate is NOT set on donor shard and IS set on the recipient shard when real
+// delete op is done during chunk migration within the chunk range.
+//
+
+load('./jstests/libs/chunk_manipulation_util.js');
+
+(function() {
+ "use strict";
+
+ var staticMongod = MongoRunner.runMongod({}); // For startParallelOps.
+
+ /**
+ * Start up new sharded cluster, stop balancer that would interfere in manual chunk management.
+ */
+
+ var st = new ShardingTest({shards: 2, mongos: 1, rs: {nodes: 3}});
+ st.stopBalancer();
+
+ var mongos = st.s0, admin = mongos.getDB('admin'),
+ shards = mongos.getCollection('config.shards').find().toArray(), dbName = "testDB",
+ ns = dbName + ".foo", coll = mongos.getCollection(ns), donor = st.shard0,
+ recipient = st.shard1, donorColl = donor.getCollection(ns),
+ recipientColl = recipient.getCollection(ns), donorLocal = donor.getDB('local'),
+ recipientLocal = recipient.getDB('local');
+
+ // Two chunks
+ // Donor: [0, 2) [2, 5)
+ // Recipient:
+ jsTest.log('Enable sharding of the collection and pre-split into two chunks....');
+
+ assert.commandWorked(admin.runCommand({enableSharding: dbName}));
+ st.ensurePrimaryShard(dbName, shards[0]._id);
+ assert.commandWorked(donorColl.createIndex({_id: 1}));
+ assert.commandWorked(admin.runCommand({shardCollection: ns, key: {_id: 1}}));
+ assert.commandWorked(admin.runCommand({split: ns, middle: {_id: 2}}));
+
+ // 6 documents,
+ // donor: 2 in the first chunk, 3 in the second.
+ // recipient: 1 document (shardkey overlaps with a doc in second chunk of donor shard)
+ jsTest.log('Inserting 5 docs into donor shard, 1 doc into the recipient shard....');
+
+ for (var i = 0; i < 5; ++i)
+ assert.writeOK(coll.insert({_id: i}));
+ assert.eq(5, donorColl.count());
+
+ for (var i = 2; i < 3; ++i)
+ assert.writeOK(recipientColl.insert({_id: i}));
+ assert.eq(1, recipientColl.count());
+
+ /**
+ * Set failpoint: recipient will pause migration after cloning chunk data from donor,
+ * before checking transfer mods log on donor.
+ */
+
+ jsTest.log('setting recipient failpoint cloned');
+ pauseMigrateAtStep(recipient, migrateStepNames.cloned);
+
+ /**
+ * Start moving chunk [2, 5) from donor shard to recipient shard, run in the background.
+ */
+
+ // Donor: [0, 2)
+ // Recipient: [2, 5)
+ jsTest.log('Starting chunk migration, pause after cloning...');
+
+ var joinMoveChunk = moveChunkParallel(
+ staticMongod, st.s0.host, {_id: 2}, null, coll.getFullName(), shards[1]._id);
+
+ /**
+ * Wait for recipient to finish cloning.
+ * THEN update 1 document {_id: 3} on donor within the currently migrating chunk.
+ * AND delete 1 document {_id: 4} on donor within the currently migrating chunk.
+ */
+
+ waitForMigrateStep(recipient, migrateStepNames.cloned);
+
+ jsTest.log('Update 1 doc and delete 1 doc on donor within the currently migrating chunk...');
+
+ assert.writeOK(coll.update({_id: 3}, {_id: 3, a: "updated doc"}));
+ assert.writeOK(coll.remove({_id: 4}));
+
+ /**
+ * Finish migration. Unpause recipient migration, wait for it to collect
+ * the transfer mods log from donor and finish migration.
+ */
+
+ jsTest.log('Continuing and finishing migration...');
+ unpauseMigrateAtStep(recipient, migrateStepNames.cloned);
+ joinMoveChunk();
+
+ /**
+ * Check documents are where they should be: 2 docs in donor chunk, 2 docs in recipient chunk
+ * (because third doc in recipient shard's chunk got deleted on the donor shard during
+ * migration).
+ */
+
+ jsTest.log('Checking that documents are on the shards they should be...');
+
+ assert.eq(2, recipientColl.count(), "Recipient shard doesn't have exactly 2 documents!");
+ assert.eq(2, donorColl.count(), "Donor shard doesn't have exactly 2 documents!");
+ assert.eq(4, coll.count(), "Collection total is not 4!");
+
+ /**
+ * Check that the fromMigrate flag has been set correctly in donor and recipient oplogs,
+ */
+
+ jsTest.log('Checking donor and recipient oplogs for correct fromMigrate flags...');
+
+ var donorOplogRes = donorLocal.oplog.rs.find({op: 'd', fromMigrate: true, 'o._id': 2}).count();
+ assert.eq(1,
+ donorOplogRes,
+ "fromMigrate flag wasn't set on the donor shard's oplog for " +
+ "migrating delete op on {_id: 2}! Test #2 failed.");
+
+ donorOplogRes =
+ donorLocal.oplog.rs.find({op: 'd', fromMigrate: {$exists: false}, 'o._id': 4}).count();
+ assert.eq(1,
+ donorOplogRes,
+ "Real delete of {_id: 4} on donor shard incorrectly set the " +
+ "fromMigrate flag in the oplog! Test #5 failed.");
+
+ var recipientOplogRes =
+ recipientLocal.oplog.rs.find({op: 'i', fromMigrate: true, 'o._id': 2}).count();
+ assert.eq(1,
+ recipientOplogRes,
+ "fromMigrate flag wasn't set on the recipient shard's " +
+ "oplog for migrating insert op on {_id: 2}! Test #3 failed.");
+
+ recipientOplogRes =
+ recipientLocal.oplog.rs.find({op: 'd', fromMigrate: true, 'o._id': 2}).count();
+ assert.eq(1,
+ recipientOplogRes,
+ "fromMigrate flag wasn't set on the recipient shard's " +
+ "oplog for delete op on the old {_id: 2} that overlapped " +
+ "with the chunk about to be copied! Test #1 failed.");
+
+ recipientOplogRes =
+ recipientLocal.oplog.rs.find({op: 'u', fromMigrate: true, 'o._id': 3}).count();
+ assert.eq(1,
+ recipientOplogRes,
+ "fromMigrate flag wasn't set on the recipient shard's " +
+ "oplog for update op on {_id: 3}! Test #4 failed.");
+
+ recipientOplogRes =
+ recipientLocal.oplog.rs.find({op: 'd', fromMigrate: true, 'o._id': 4}).count();
+ assert.eq(1,
+ recipientOplogRes,
+ "fromMigrate flag wasn't set on the recipient shard's " +
+ "oplog for delete op on {_id: 4} that occurred during " +
+ "migration! Test #5 failed.");
+
+ jsTest.log('DONE!');
+ st.stop();
+
+})();
diff --git a/jstests/sharding/migration_with_source_ops.js b/jstests/sharding/migration_with_source_ops.js
new file mode 100644
index 00000000000..31b6fff75e9
--- /dev/null
+++ b/jstests/sharding/migration_with_source_ops.js
@@ -0,0 +1,146 @@
+//
+// Tests during chunk migration that the recipient does not receive out of range operations from
+// the donor.
+//
+// Pauses the migration on the recipient shard after the initial data chunk cloning is finished.
+// This allows time for the donor shard to perform inserts/deletes/updates, half of which are on
+// the migrating chunk. The recipient is then set to continue, collecting the mods from the
+// donor's transfer mods log, and finishes the migration. A failpoint is set prior to resuming
+// in the recipient shard to fail if it receives an out of chunk range insert/delete/update from
+// the donor's transfer mods log.
+//
+// The idea is that the recipient shard should not be collecting inserts/deletes/updates from
+// the donor shard's transfer mods log that are not in range and will unnecessarily prevent the
+// migration from finishing: the migration can only end when donor's log of mods for the migrating
+// chunk is empty.
+//
+
+load('./jstests/libs/chunk_manipulation_util.js');
+
+(function() {
+ "use strict";
+
+ var staticMongod = MongoRunner.runMongod({}); // For startParallelOps.
+
+ /**
+ * Start up new sharded cluster, stop balancer that would interfere in manual chunk management.
+ */
+
+ var st = new ShardingTest({shards: 2, mongos: 1});
+ st.stopBalancer();
+
+ var mongos = st.s0, admin = mongos.getDB('admin'),
+ shards = mongos.getCollection('config.shards').find().toArray(), dbName = "testDB",
+ ns = dbName + ".foo", coll = mongos.getCollection(ns), donor = st.shard0,
+ recipient = st.shard1, donorColl = donor.getCollection(ns),
+ recipientColl = recipient.getCollection(ns);
+
+ /**
+ * Exable sharding, and split collection into two chunks.
+ */
+
+ // Two chunks
+ // Donor: [0, 20) [20, 40)
+ // Recipient:
+ jsTest.log('Enabling sharding of the collection and pre-splitting into two chunks....');
+ assert.commandWorked(admin.runCommand({enableSharding: dbName}));
+ st.ensurePrimaryShard(dbName, shards[0]._id);
+ assert.commandWorked(admin.runCommand({shardCollection: ns, key: {a: 1}}));
+ assert.commandWorked(admin.runCommand({split: ns, middle: {a: 20}}));
+
+ /**
+ * Insert data into collection
+ */
+
+ // 10 documents in each chunk on the donor
+ jsTest.log('Inserting 20 docs into donor shard, 10 in each chunk....');
+ for (var i = 0; i < 10; ++i)
+ assert.writeOK(coll.insert({a: i}));
+ for (var i = 20; i < 30; ++i)
+ assert.writeOK(coll.insert({a: i}));
+ assert.eq(20, coll.count());
+
+ /**
+ * Set failpoints. Recipient will crash if an out of chunk range data op is
+ * received from donor. Recipient will pause migration after cloning chunk data from donor,
+ * before checking transfer mods log on donor.
+ */
+
+ jsTest.log('Setting failpoint failMigrationReceivedOutOfRangeOperation');
+ assert.commandWorked(recipient.getDB('admin').runCommand(
+ {configureFailPoint: 'failMigrationReceivedOutOfRangeOperation', mode: 'alwaysOn'}));
+
+ jsTest.log(
+ 'Setting chunk migration recipient failpoint so that it pauses after bulk clone step');
+ pauseMigrateAtStep(recipient, migrateStepNames.cloned);
+
+ /**
+ * Start a moveChunk in the background. Move chunk [20, 40), which has 10 docs in the
+ * range, from shard 0 (donor) to shard 1 (recipient). Migration will pause after
+ * cloning step (when it reaches the recipient failpoint).
+ */
+
+ // Donor: [0, 20)
+ // Recipient: [20, 40)
+ jsTest.log('Starting migration, pause after cloning...');
+ var joinMoveChunk = moveChunkParallel(
+ staticMongod, st.s0.host, {a: 20}, null, coll.getFullName(), shards[1]._id);
+
+ /**
+ * Wait for recipient to finish cloning step.
+ * THEN delete 10 documents on the donor shard, 5 in the migrating chunk and 5 in the remaining
+ *chunk.
+ * AND insert 2 documents on the donor shard, 1 in the migrating chunk and 1 in the remaining
+ *chunk.
+ * AND update 2 documents on the donor shard, 1 in the migrating chunk and 1 in the remaining
+ *chunk.
+ *
+ * This will populate the migration transfer mods log, which the recipient will collect when it
+ * is unpaused.
+ */
+
+ waitForMigrateStep(recipient, migrateStepNames.cloned);
+
+ jsTest.log('Deleting 5 docs from each chunk, migrating chunk and remaining chunk...');
+ assert.writeOK(coll.remove({$and: [{a: {$gte: 5}}, {a: {$lt: 25}}]}));
+
+ jsTest.log('Inserting 1 in the migrating chunk range and 1 in the remaining chunk range...');
+ assert.writeOK(coll.insert({a: 10}));
+ assert.writeOK(coll.insert({a: 30}));
+
+ jsTest.log('Updating 1 in the migrating chunk range and 1 in the remaining chunk range...');
+ assert.writeOK(coll.update({a: 0}, {a: 0, updatedData: "updated"}));
+ assert.writeOK(coll.update({a: 25}, {a: 25, updatedData: "updated"}));
+
+ /**
+ * Finish migration. Unpause recipient migration, wait for it to collect
+ * the new ops from the donor shard's migration transfer mods log, and finish.
+ */
+
+ jsTest.log('Continuing and finishing migration...');
+ unpauseMigrateAtStep(recipient, migrateStepNames.cloned);
+ joinMoveChunk();
+
+ /**
+ * Check documents are where they should be: 6 docs in each shard's respective chunk.
+ */
+
+ jsTest.log('Checking that documents are on the shards they should be...');
+ assert.eq(6, donorColl.count());
+ assert.eq(6, recipientColl.count());
+ assert.eq(12, coll.count());
+
+ /**
+ * Check that the updated documents are where they should be, one on each shard.
+ */
+
+ jsTest.log('Checking that documents were updated correctly...');
+ var donorCollUpdatedNum = donorColl.find({updatedData: "updated"}).count();
+ assert.eq(1, donorCollUpdatedNum, "Update failed on donor shard during migration!");
+ var recipientCollUpdatedNum = recipientColl.find({updatedData: "updated"}).count();
+ assert.eq(1, recipientCollUpdatedNum, "Update failed on recipient shard during migration!");
+
+ jsTest.log('DONE!');
+ st.stop();
+
+})();
diff --git a/src/mongo/s/d_migrate.cpp b/src/mongo/s/d_migrate.cpp
index fdcd48f0ef6..5fa87e71784 100644
--- a/src/mongo/s/d_migrate.cpp
+++ b/src/mongo/s/d_migrate.cpp
@@ -1827,6 +1827,9 @@ MONGO_FP_DECLARE(migrateThreadHangAtStep2);
MONGO_FP_DECLARE(migrateThreadHangAtStep3);
MONGO_FP_DECLARE(migrateThreadHangAtStep4);
MONGO_FP_DECLARE(migrateThreadHangAtStep5);
+MONGO_FP_DECLARE(migrateThreadHangAtStep6);
+
+MONGO_FP_DECLARE(failMigrationReceivedOutOfRangeOperation);
class MigrateStatus {
public:
@@ -1948,7 +1951,7 @@ public:
<< epoch.toString() << endl;
string errmsg;
- MoveTimingHelper timing(txn, "to", ns, min, max, 5 /* steps */, &errmsg, "", "");
+ MoveTimingHelper timing(txn, "to", ns, min, max, 6 /* steps */, &errmsg, "", "");
ScopedDbConnection conn(fromShard);
conn->getLastError(); // just test connection
@@ -2358,6 +2361,8 @@ public:
}
setState(DONE);
+ timing.done(6);
+ MONGO_FP_PAUSE_WHILE(migrateThreadHangAtStep6);
conn.done();
}
@@ -2415,8 +2420,11 @@ public:
BSONObj fullObj;
if (Helpers::findById(txn, ctx.db(), ns.c_str(), id, fullObj)) {
if (!isInRange(fullObj, min, max, shardKeyPattern)) {
- log() << "not applying out of range deletion: " << fullObj << migrateLog;
+ if (MONGO_FAIL_POINT(failMigrationReceivedOutOfRangeOperation)) {
+ invariant(false);
+ }
+ log() << "not applying out of range deletion: " << fullObj << migrateLog;
continue;
}
}
@@ -2447,6 +2455,11 @@ public:
BSONObj updatedDoc = i.next().Obj();
+ if (MONGO_FAIL_POINT(failMigrationReceivedOutOfRangeOperation) &&
+ !isInRange(updatedDoc, min, max, shardKeyPattern)) {
+ invariant(false);
+ }
+
BSONObj localDoc;
if (willOverrideLocalId(
txn, ns, min, max, shardKeyPattern, cx.ctx().db(), updatedDoc, &localDoc)) {