summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDianna Hohensee <dianna.hohensee@10gen.com>2016-05-24 10:55:23 -0400
committerDianna Hohensee <dianna.hohensee@10gen.com>2016-06-21 15:22:36 -0400
commitdfe69f949be0e1aa83efcaaea07cc0c4639677bf (patch)
treed7beb97f3db76157520adcfaa8893808bbdb991c
parent1daac14a2bc1cd67edf509075ca2b503bdeebb38 (diff)
downloadmongo-dfe69f949be0e1aa83efcaaea07cc0c4639677bf.tar.gz
SERVER-24031 Prevent cleanupOrphaned from starting if a migration is running.
-rw-r--r--jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js81
-rw-r--r--jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js42
-rw-r--r--src/mongo/db/commands/cleanup_orphaned_cmd.cpp6
3 files changed, 84 insertions, 45 deletions
diff --git a/jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js
index 4283abdae48..dc82ab033ed 100644
--- a/jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js
+++ b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js
@@ -1,7 +1,9 @@
//
// Tests cleanupOrphaned concurrent with moveChunk.
-// Inserts orphan documents to the donor and recipient shards during the moveChunk and
-// verifies that cleanupOrphaned removes orphans.
+//
+// Inserts orphan documents on the donor and recipient shards during a moveChunk and verifies that
+// cleanupOrphaned removes orphans on the recipient shard and fails on the donor shard --
+// cleanupOrphaned aborts if there is an active migration when it is called.
//
load('./jstests/libs/chunk_manipulation_util.js');
@@ -30,25 +32,25 @@ load('./jstests/libs/cleanup_orphaned_util.js');
assert.commandWorked(admin.runCommand(
{moveChunk: ns, find: {_id: 20}, to: shards[1]._id, _waitForDelete: true}));
- jsTest.log('Inserting 40 docs into shard 0....');
+ jsTest.log('Inserting 20 docs into shard 0....');
for (var i = -20; i < 20; i += 2)
coll.insert({_id: i});
assert.eq(null, coll.getDB().getLastError());
assert.eq(20, donorColl.count());
- jsTest.log('Inserting 25 docs into shard 1....');
+ jsTest.log('Inserting 10 docs into shard 1....');
for (i = 20; i < 40; i += 2)
coll.insert({_id: i});
assert.eq(null, coll.getDB().getLastError());
assert.eq(10, recipientColl.count());
//
- // Start a moveChunk in the background. Move chunk [0, 20), which has 10 docs,
- // from shard 0 to shard 1. Pause it at some points in the donor's and
- // recipient's work flows, and test cleanupOrphaned on shard 0 and shard 1.
+ // Start a moveChunk in the background. Move chunk [0, 20), which has 10 docs, from shard 0 to
+ // shard 1. Pause it at some points in the donor and recipient's work flows, and test
+ // cleanupOrphaned on shard 0 and shard 1.
//
- jsTest.log('setting failpoint startedMoveChunk');
+ jsTest.log('setting failpoint startedMoveChunk (donor) and cloned (recipient)');
pauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk);
pauseMigrateAtStep(recipient, migrateStepNames.cloned);
var joinMoveChunk = moveChunkParallel(
@@ -75,42 +77,49 @@ load('./jstests/libs/cleanup_orphaned_util.js');
assert.eq(null, recipientColl.getDB().getLastError());
assert.eq(21, recipientColl.count());
- cleanupOrphaned(donor, ns, 2);
- assert.eq(20, donorColl.count());
+ // Cannot clean donor shard -- active migration --, but can clean recipient.
+ assert.throws(function() {
+ cleanupOrphaned(donor, ns, 2);
+ });
+ assert.eq(21, donorColl.count());
cleanupOrphaned(recipient, ns, 2);
assert.eq(20, recipientColl.count());
jsTest.log('Inserting document on donor side');
- // Inserted a new document (not an orphan) with id 19, which belongs in the
- // [0, 20) chunk.
+ // Inserted a new document (not an orphan) with id 19, which belongs in the migrating [0, 20)
+ // chunk.
donorColl.insert({_id: 19});
assert.eq(null, coll.getDB().getLastError());
- assert.eq(21, donorColl.count());
+ assert.eq(22, donorColl.count());
// Recipient transfers this modification.
jsTest.log('Let migrate proceed to transferredMods');
- pauseMigrateAtStep(recipient, migrateStepNames.transferredMods);
- unpauseMigrateAtStep(recipient, migrateStepNames.cloned);
- waitForMigrateStep(recipient, migrateStepNames.transferredMods);
+ proceedToMigrateStep(recipient, migrateStepNames.transferredMods);
jsTest.log('Done letting migrate proceed to transferredMods');
assert.eq(21, recipientColl.count(), "Recipient didn't transfer inserted document.");
- cleanupOrphaned(donor, ns, 2);
- assert.eq(21, donorColl.count());
+ // Still cannot clean donor -- active migration. Clean up runs on recipient, but the pending
+ // range for the migration is safe from cleanupOrphaned.
+ assert.throws(function() {
+ cleanupOrphaned(donor, ns, 2);
+ });
+ assert.eq(22, donorColl.count());
cleanupOrphaned(recipient, ns, 2);
assert.eq(21, recipientColl.count());
// Create orphans.
- donorColl.insert([{_id: 26}]);
+ donorColl.insert([{_id: 27}]);
assert.eq(null, donorColl.getDB().getLastError());
- assert.eq(22, donorColl.count());
+ assert.eq(23, donorColl.count());
recipientColl.insert([{_id: -1}]);
assert.eq(null, recipientColl.getDB().getLastError());
assert.eq(22, recipientColl.count());
- cleanupOrphaned(donor, ns, 2);
- assert.eq(21, donorColl.count());
+ assert.throws(function() {
+ cleanupOrphaned(donor, ns, 2);
+ });
+ assert.eq(23, donorColl.count());
cleanupOrphaned(recipient, ns, 2);
assert.eq(21, recipientColl.count());
@@ -121,26 +130,38 @@ load('./jstests/libs/cleanup_orphaned_util.js');
proceedToMigrateStep(recipient, migrateStepNames.done);
// Create orphans.
- donorColl.insert([{_id: 26}]);
+ donorColl.insert([{_id: 28}]);
assert.eq(null, donorColl.getDB().getLastError());
- assert.eq(22, donorColl.count());
+ assert.eq(24, donorColl.count());
recipientColl.insert([{_id: -1}]);
assert.eq(null, recipientColl.getDB().getLastError());
assert.eq(22, recipientColl.count());
- // cleanupOrphaned should still fail on donor, but should work on the recipient
- cleanupOrphaned(donor, ns, 2);
- assert.eq(10, donorColl.count());
+ assert.throws(function() {
+ cleanupOrphaned(donor, ns, 2);
+ });
+ assert.eq(24, donorColl.count());
cleanupOrphaned(recipient, ns, 2);
assert.eq(21, recipientColl.count());
- // Let migration thread complete.
+ // Let recipient side of the migration finish so that the donor can proceed with the commit and
+ // complete the migration.
unpauseMigrateAtStep(recipient, migrateStepNames.done);
+ waitForMoveChunkStep(donor, moveChunkStepNames.committed);
+
+ // Donor is paused after the migration chunk commit, but before it finishes the cleanup that
+ // includes running the range deleter. Thus it technically has orphaned data -- migration is
+ // complete, but moved data is still present. cleanupOrphaned can remove the data the donor
+ // would otherwise clean up itself in its post-migration delete phase.
+ cleanupOrphaned(donor, ns, 2);
+ assert.eq(10, donorColl.count());
+
+ // Let the donor migration finish.
unpauseMoveChunkAtStep(donor, moveChunkStepNames.committed);
joinMoveChunk();
- // Donor has finished post-move delete.
- cleanupOrphaned(donor, ns, 2); // this is necessary for the count to not be 11
+ // Donor has finished post-move delete, which had nothing to remove with the range deleter
+ // because of the preemptive cleanupOrphaned call.
assert.eq(10, donorColl.count());
assert.eq(21, recipientColl.count());
assert.eq(31, coll.count());
diff --git a/jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js
index ecbfc39ad65..a3051725825 100644
--- a/jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js
+++ b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js
@@ -53,9 +53,9 @@ load('./jstests/libs/cleanup_orphaned_util.js');
assert.eq(null, coll.getDB().getLastError());
//
- // Start a moveChunk in the background from shard 0 to shard 1. Pause it at
- // some points in the donor's and recipient's work flows, and test
- // cleanupOrphaned.
+ // Start a moveChunk in the background from shard 0 to shard 1. Pause it at some points in the
+ // donor's and recipient's work flows and test cleanupOrphaned: it should fail on the donor
+ // while the migration is active.
//
var donor, recip;
@@ -67,9 +67,9 @@ load('./jstests/libs/cleanup_orphaned_util.js');
donor = st.shard1;
}
- jsTest.log('setting failpoint startedMoveChunk');
+ jsTest.log('setting failpoint startedMoveChunk (donor) and cloned (recipient)');
pauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk);
- pauseMigrateAtStep(recip, migrateStepNames.cloned);
+ pauseMigrateAtStep(recip, migrateStepNames.transferredMods);
var joinMoveChunk = moveChunkParallel(staticMongod,
st.s0.host,
@@ -79,8 +79,7 @@ load('./jstests/libs/cleanup_orphaned_util.js');
recip.shardName);
waitForMoveChunkStep(donor, moveChunkStepNames.startedMoveChunk);
- waitForMigrateStep(recip, migrateStepNames.cloned);
- proceedToMigrateStep(recip, migrateStepNames.transferredMods);
+ waitForMigrateStep(recip, migrateStepNames.transferredMods);
// recipient has run _recvChunkStart and begun its migration thread;
// 'doc' has been cloned and chunkWithDoc is noted as 'pending' on recipient.
@@ -89,9 +88,11 @@ load('./jstests/libs/cleanup_orphaned_util.js');
assert.eq(1, donorColl.count());
assert.eq(1, recipColl.count());
- // cleanupOrphaned should go through two iterations, since the default chunk
- // setup leaves two unowned ranges on each shard.
- cleanupOrphaned(donor, ns, 2);
+ // cleanupOrphaned should go through two iterations, since the default chunk setup leaves two
+ // unowned ranges on each shard. Command fails on donor shard because of active migration.
+ assert.throws(function() {
+ cleanupOrphaned(donor, ns, 2);
+ });
cleanupOrphaned(recip, ns, 2);
assert.eq(1, donorColl.count());
assert.eq(1, recipColl.count());
@@ -102,15 +103,26 @@ load('./jstests/libs/cleanup_orphaned_util.js');
unpauseMigrateAtStep(recip, migrateStepNames.transferredMods);
proceedToMigrateStep(recip, migrateStepNames.done);
- // cleanupOrphaned removes migrated data from donor. The donor would
- // otherwise clean them up itself, in the post-move delete phase.
- cleanupOrphaned(donor, ns, 2);
- assert.eq(0, donorColl.count());
+ // Donor cannot clean up orphans while there's an active migration.
+ assert.throws(function() {
+ cleanupOrphaned(donor, ns, 2);
+ });
+ assert.eq(1, donorColl.count());
cleanupOrphaned(recip, ns, 2);
assert.eq(1, recipColl.count());
- // Let migration thread complete.
+ // Let recip side of the migration finish so that the donor proceeds with the commit.
unpauseMigrateAtStep(recip, migrateStepNames.done);
+ waitForMoveChunkStep(donor, moveChunkStepNames.committed);
+
+ // Donor is paused after the migration chunk commit, but before it finishes the cleanup that
+ // includes running the range deleter. Thus it technically has orphaned data -- commit is
+ // complete, but moved data is still present. cleanupOrphaned can remove the data the donor
+ // would otherwise clean up itself in its post-move delete phase.
+ cleanupOrphaned(donor, ns, 2);
+ assert.eq(0, donorColl.count());
+
+ // Let migration thread complete.
unpauseMoveChunkAtStep(donor, moveChunkStepNames.committed);
joinMoveChunk();
diff --git a/src/mongo/db/commands/cleanup_orphaned_cmd.cpp b/src/mongo/db/commands/cleanup_orphaned_cmd.cpp
index e3d36168a1f..11414984b2f 100644
--- a/src/mongo/db/commands/cleanup_orphaned_cmd.cpp
+++ b/src/mongo/db/commands/cleanup_orphaned_cmd.cpp
@@ -268,6 +268,12 @@ public:
return false;
}
+ if (shardingState->migrationSourceManager()->isActive()) {
+ errmsg = str::stream() << "Unable to cleanup orphaned documents because there is an "
+ << "active migration";
+ return false;
+ }
+
ChunkVersion shardVersion;
status = shardingState->refreshMetadataNow(txn, ns, &shardVersion);
if (!status.isOK()) {