summaryrefslogtreecommitdiff
path: root/src/mongo/db/s/balancer
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/s/balancer')
-rw-r--r--src/mongo/db/s/balancer/migration_manager.cpp188
-rw-r--r--src/mongo/db/s/balancer/migration_manager.h36
-rw-r--r--src/mongo/db/s/balancer/migration_manager_test.cpp257
3 files changed, 58 insertions, 423 deletions
diff --git a/src/mongo/db/s/balancer/migration_manager.cpp b/src/mongo/db/s/balancer/migration_manager.cpp
index bd8465338f0..6bccabf7326 100644
--- a/src/mongo/db/s/balancer/migration_manager.cpp
+++ b/src/mongo/db/s/balancer/migration_manager.cpp
@@ -61,15 +61,17 @@ using str::stream;
namespace {
-const char kChunkTooBig[] = "chunkTooBig";
+const char kChunkTooBig[] = "chunkTooBig"; // TODO: delete in 3.8
const WriteConcernOptions kMajorityWriteConcern(WriteConcernOptions::kMajority,
WriteConcernOptions::SyncMode::UNSET,
Seconds(15));
/**
* Parses the 'commandResponse' and converts it to a status to use as the outcome of the command.
- * Preserves backwards compatibility with 3.2 and earlier shards that, rather than use a ChunkTooBig
+ * Preserves backwards compatibility with 3.4 and earlier shards that, rather than use a ChunkTooBig
* error code, include an extra field in the response.
+ *
+ * TODO: Delete in 3.8
*/
Status extractMigrationStatusFromCommandResponse(const BSONObj& commandResponse) {
Status commandStatus = getStatusFromCommandResult(commandResponse);
@@ -98,23 +100,13 @@ StatusWith<DistLockHandle> acquireDistLock(OperationContext* txn,
txn, nss.ns(), whyMessage, lockSessionID, DistLockManager::kSingleLockAttemptTimeout);
if (!statusWithDistLockHandle.isOK()) {
- // If we get LockBusy while trying to acquire the collection distributed lock, this implies
- // that a concurrent collection operation is running either on a 3.2 shard or on mongos.
- // Convert it to ConflictingOperationInProgress to better indicate the error.
- //
- // In addition, the code which re-schedules parallel migrations serially for 3.2 shard
- // compatibility uses the LockBusy code as a hint to do the reschedule.
- const ErrorCodes::Error code = (statusWithDistLockHandle == ErrorCodes::LockBusy
- ? ErrorCodes::ConflictingOperationInProgress
- : statusWithDistLockHandle.getStatus().code());
-
- return {code,
+ return {statusWithDistLockHandle.getStatus().code(),
stream() << "Could not acquire collection lock for " << nss.ns()
<< " to migrate chunks, due to "
<< statusWithDistLockHandle.getStatus().reason()};
}
- return std::move(statusWithDistLockHandle.getValue());
+ return statusWithDistLockHandle;
}
/**
@@ -134,7 +126,7 @@ MigrationManager::MigrationManager(ServiceContext* serviceContext)
MigrationManager::~MigrationManager() {
// The migration manager must be completely quiesced at destruction time
- invariant(_activeMigrationsWithoutDistLock.empty());
+ invariant(_activeMigrations.empty());
}
MigrationStatuses MigrationManager::executeMigrationsForAutoBalance(
@@ -146,8 +138,6 @@ MigrationStatuses MigrationManager::executeMigrationsForAutoBalance(
MigrationStatuses migrationStatuses;
- vector<MigrateInfo> rescheduledMigrations;
-
{
std::map<MigrationIdentifier, ScopedMigrationRequest> scopedMigrationRequests;
vector<std::pair<shared_ptr<Notification<RemoteCommandResponse>>, MigrateInfo>> responses;
@@ -165,18 +155,12 @@ MigrationStatuses MigrationManager::executeMigrationsForAutoBalance(
scopedMigrationRequests.emplace(migrateInfo.getName(),
std::move(statusWithScopedMigrationRequest.getValue()));
- responses.emplace_back(_schedule(txn,
- migrateInfo,
- false, // Config server takes the collection dist lock
- maxChunkSizeBytes,
- secondaryThrottle,
- waitForDelete),
- migrateInfo);
+ responses.emplace_back(
+ _schedule(txn, migrateInfo, maxChunkSizeBytes, secondaryThrottle, waitForDelete),
+ migrateInfo);
}
- // Wait for all the scheduled migrations to complete and note the ones, which failed with a
- // LockBusy error code. These need to be executed serially, without the distributed lock
- // being held by the config server for backwards compatibility with 3.2 shards.
+ // Wait for all the scheduled migrations to complete.
for (auto& response : responses) {
auto notification = std::move(response.first);
auto migrateInfo = std::move(response.second);
@@ -187,39 +171,8 @@ MigrationStatuses MigrationManager::executeMigrationsForAutoBalance(
invariant(it != scopedMigrationRequests.end());
Status commandStatus =
_processRemoteCommandResponse(remoteCommandResponse, &it->second);
- if (commandStatus == ErrorCodes::LockBusy) {
- rescheduledMigrations.emplace_back(std::move(migrateInfo));
- } else {
- migrationStatuses.emplace(migrateInfo.getName(), std::move(commandStatus));
- }
- }
- }
-
- // Schedule all 3.2 compatibility migrations sequentially
- for (const auto& migrateInfo : rescheduledMigrations) {
- // Write a document to the config.migrations collection, in case this migration must be
- // recovered by the Balancer. Fail if the chunk is already moving.
- auto statusWithScopedMigrationRequest =
- ScopedMigrationRequest::writeMigration(txn, migrateInfo, waitForDelete);
- if (!statusWithScopedMigrationRequest.isOK()) {
- migrationStatuses.emplace(migrateInfo.getName(),
- std::move(statusWithScopedMigrationRequest.getStatus()));
- continue;
+ migrationStatuses.emplace(migrateInfo.getName(), std::move(commandStatus));
}
-
- RemoteCommandResponse remoteCommandResponse =
- _schedule(txn,
- migrateInfo,
- true, // Shard takes the collection dist lock
- maxChunkSizeBytes,
- secondaryThrottle,
- waitForDelete)
- ->get();
-
- Status commandStatus = _processRemoteCommandResponse(
- remoteCommandResponse, &statusWithScopedMigrationRequest.getValue());
-
- migrationStatuses.emplace(migrateInfo.getName(), std::move(commandStatus));
}
invariant(migrationStatuses.size() == migrateInfos.size());
@@ -244,13 +197,7 @@ Status MigrationManager::executeManualMigration(
}
RemoteCommandResponse remoteCommandResponse =
- _schedule(txn,
- migrateInfo,
- false, // Config server takes the collection dist lock
- maxChunkSizeBytes,
- secondaryThrottle,
- waitForDelete)
- ->get();
+ _schedule(txn, migrateInfo, maxChunkSizeBytes, secondaryThrottle, waitForDelete)->get();
auto scopedCMStatus = ScopedChunkManager::refreshAndGet(txn, NamespaceString(migrateInfo.ns));
if (!scopedCMStatus.isOK()) {
@@ -343,10 +290,7 @@ void MigrationManager::startRecoveryAndAcquireDistLocks(OperationContext* txn) {
auto statusWithDistLockHandle = distLockManager->tryLockWithLocalWriteConcern(
txn, migrateType.getNss().ns(), whyMessage, _lockSessionID);
- if (!statusWithDistLockHandle.isOK() &&
- statusWithDistLockHandle.getStatus() != ErrorCodes::LockBusy) {
- // LockBusy is alright because that should mean a 3.2 shard has it for the active
- // migration.
+ if (!statusWithDistLockHandle.isOK()) {
log() << "Failed to acquire distributed lock for collection '"
<< migrateType.getNss().ns()
<< "' during balancer recovery of an active migration. Abandoning"
@@ -432,12 +376,8 @@ void MigrationManager::finishRecovery(OperationContext* txn,
scheduledMigrations++;
- responses.emplace_back(_schedule(txn,
- migrationInfo,
- false, // Config server takes the collection dist lock
- maxChunkSizeBytes,
- secondaryThrottle,
- waitForDelete));
+ responses.emplace_back(
+ _schedule(txn, migrationInfo, maxChunkSizeBytes, secondaryThrottle, waitForDelete));
}
// If no migrations were scheduled for this namespace, free the dist lock
@@ -473,7 +413,7 @@ void MigrationManager::interruptAndDisableMigrations() {
_state = State::kStopping;
// Interrupt any active migrations with dist lock
- for (auto& cmsEntry : _activeMigrationsWithDistLock) {
+ for (auto& cmsEntry : _activeMigrations) {
auto* cms = &cmsEntry.second;
for (auto& migration : cms->migrations) {
@@ -483,13 +423,6 @@ void MigrationManager::interruptAndDisableMigrations() {
}
}
- // Interrupt any active migrations without dist lock
- for (auto& migration : _activeMigrationsWithoutDistLock) {
- if (migration.callbackHandle) {
- executor->cancel(*migration.callbackHandle);
- }
- }
-
_checkDrained_inlock();
}
@@ -499,18 +432,13 @@ void MigrationManager::drainActiveMigrations() {
if (_state == State::kStopped)
return;
invariant(_state == State::kStopping);
-
- _condVar.wait(lock, [this] {
- return _activeMigrationsWithDistLock.empty() && _activeMigrationsWithoutDistLock.empty();
- });
-
+ _condVar.wait(lock, [this] { return _activeMigrations.empty(); });
_state = State::kStopped;
}
shared_ptr<Notification<RemoteCommandResponse>> MigrationManager::_schedule(
OperationContext* txn,
const MigrateInfo& migrateInfo,
- bool shardTakesCollectionDistLock,
uint64_t maxChunkSizeBytes,
const MigrationSecondaryThrottleOptions& secondaryThrottle,
bool waitForDelete) {
@@ -575,8 +503,7 @@ shared_ptr<Notification<RemoteCommandResponse>> MigrationManager::_schedule(
chunk->getLastmod(),
maxChunkSizeBytes,
secondaryThrottle,
- waitForDelete,
- shardTakesCollectionDistLock);
+ waitForDelete);
stdx::lock_guard<stdx::mutex> lock(_mutex);
@@ -590,24 +517,20 @@ shared_ptr<Notification<RemoteCommandResponse>> MigrationManager::_schedule(
auto retVal = migration.completionNotification;
- if (shardTakesCollectionDistLock) {
- _scheduleWithoutDistLock_inlock(txn, fromHostStatus.getValue(), std::move(migration));
- } else {
- _scheduleWithDistLock_inlock(txn, fromHostStatus.getValue(), std::move(migration));
- }
+ _schedule_inlock(txn, fromHostStatus.getValue(), std::move(migration));
return retVal;
}
-void MigrationManager::_scheduleWithDistLock_inlock(OperationContext* txn,
- const HostAndPort& targetHost,
- Migration migration) {
+void MigrationManager::_schedule_inlock(OperationContext* txn,
+ const HostAndPort& targetHost,
+ Migration migration) {
executor::TaskExecutor* const executor = Grid::get(txn)->getExecutorPool()->getFixedExecutor();
const NamespaceString nss(migration.nss);
- auto it = _activeMigrationsWithDistLock.find(nss);
- if (it == _activeMigrationsWithDistLock.end()) {
+ auto it = _activeMigrations.find(nss);
+ if (it == _activeMigrations.end()) {
// Acquire the collection distributed lock (blocking call)
auto distLockHandleStatus = acquireDistLock(txn, _lockSessionID, nss);
if (!distLockHandleStatus.isOK()) {
@@ -615,7 +538,7 @@ void MigrationManager::_scheduleWithDistLock_inlock(OperationContext* txn,
return;
}
- it = _activeMigrationsWithDistLock
+ it = _activeMigrations
.insert(std::make_pair(
nss, CollectionMigrationsState(std::move(distLockHandleStatus.getValue()))))
.first;
@@ -640,7 +563,7 @@ void MigrationManager::_scheduleWithDistLock_inlock(OperationContext* txn,
auto txn = cc().makeOperationContext();
stdx::lock_guard<stdx::mutex> lock(_mutex);
- _completeWithDistLock_inlock(txn.get(), itMigration, args.response);
+ _complete_inlock(txn.get(), itMigration, args.response);
});
if (callbackHandleWithStatus.isOK()) {
@@ -648,13 +571,12 @@ void MigrationManager::_scheduleWithDistLock_inlock(OperationContext* txn,
return;
}
- _completeWithDistLock_inlock(txn, itMigration, std::move(callbackHandleWithStatus.getStatus()));
+ _complete_inlock(txn, itMigration, std::move(callbackHandleWithStatus.getStatus()));
}
-void MigrationManager::_completeWithDistLock_inlock(
- OperationContext* txn,
- MigrationsList::iterator itMigration,
- const RemoteCommandResponse& remoteCommandResponse) {
+void MigrationManager::_complete_inlock(OperationContext* txn,
+ MigrationsList::iterator itMigration,
+ const RemoteCommandResponse& remoteCommandResponse) {
const NamespaceString nss(itMigration->nss);
// Make sure to signal the notification last, after the distributed lock is freed, so that we
@@ -662,8 +584,8 @@ void MigrationManager::_completeWithDistLock_inlock(
// still acquired.
auto notificationToSignal = itMigration->completionNotification;
- auto it = _activeMigrationsWithDistLock.find(nss);
- invariant(it != _activeMigrationsWithDistLock.end());
+ auto it = _activeMigrations.find(nss);
+ invariant(it != _activeMigrations.end());
auto collectionMigrationState = &it->second;
collectionMigrationState->migrations.erase(itMigration);
@@ -671,58 +593,20 @@ void MigrationManager::_completeWithDistLock_inlock(
if (collectionMigrationState->migrations.empty()) {
Grid::get(txn)->catalogClient(txn)->getDistLockManager()->unlock(
txn, collectionMigrationState->distLockHandle, nss.ns());
- _activeMigrationsWithDistLock.erase(it);
+ _activeMigrations.erase(it);
_checkDrained_inlock();
}
notificationToSignal->set(remoteCommandResponse);
}
-void MigrationManager::_scheduleWithoutDistLock_inlock(OperationContext* txn,
- const HostAndPort& targetHost,
- Migration migration) {
- executor::TaskExecutor* const executor = Grid::get(txn)->getExecutorPool()->getFixedExecutor();
-
- _activeMigrationsWithoutDistLock.push_front(std::move(migration));
- auto itMigration = _activeMigrationsWithoutDistLock.begin();
-
- const RemoteCommandRequest remoteRequest(
- targetHost, NamespaceString::kAdminDb.toString(), itMigration->moveChunkCmdObj, txn);
-
- StatusWith<executor::TaskExecutor::CallbackHandle> callbackHandleWithStatus =
- executor->scheduleRemoteCommand(
- remoteRequest,
- [this, itMigration](const executor::TaskExecutor::RemoteCommandCallbackArgs& args) {
- auto notificationToSignal = itMigration->completionNotification;
-
- stdx::lock_guard<stdx::mutex> lock(_mutex);
-
- _activeMigrationsWithoutDistLock.erase(itMigration);
- _checkDrained_inlock();
-
- notificationToSignal->set(args.response);
- });
-
- if (callbackHandleWithStatus.isOK()) {
- itMigration->callbackHandle = std::move(callbackHandleWithStatus.getValue());
- return;
- }
-
- auto notificationToSignal = itMigration->completionNotification;
-
- _activeMigrationsWithoutDistLock.erase(itMigration);
- _checkDrained_inlock();
-
- notificationToSignal->set(std::move(callbackHandleWithStatus.getStatus()));
-}
-
void MigrationManager::_checkDrained_inlock() {
if (_state == State::kEnabled || _state == State::kRecovering) {
return;
}
invariant(_state == State::kStopping);
- if (_activeMigrationsWithDistLock.empty() && _activeMigrationsWithoutDistLock.empty()) {
+ if (_activeMigrations.empty()) {
_condVar.notify_all();
}
}
@@ -758,6 +642,7 @@ void MigrationManager::_abandonActiveMigrationsAndEnableManager(OperationContext
Status MigrationManager::_processRemoteCommandResponse(
const RemoteCommandResponse& remoteCommandResponse,
ScopedMigrationRequest* scopedMigrationRequest) {
+
stdx::lock_guard<stdx::mutex> lock(_mutex);
Status commandStatus(ErrorCodes::InternalError, "Uninitialized value.");
@@ -774,6 +659,7 @@ Status MigrationManager::_processRemoteCommandResponse(
if (!remoteCommandResponse.isOK()) {
commandStatus = remoteCommandResponse.status;
} else {
+ // TODO: delete in 3.8
commandStatus = extractMigrationStatusFromCommandResponse(remoteCommandResponse.data);
}
diff --git a/src/mongo/db/s/balancer/migration_manager.h b/src/mongo/db/s/balancer/migration_manager.h
index b6ae1dd3ff3..b9611498148 100644
--- a/src/mongo/db/s/balancer/migration_manager.h
+++ b/src/mongo/db/s/balancer/migration_manager.h
@@ -60,8 +60,6 @@ typedef std::map<MigrationIdentifier, Status> MigrationStatuses;
/**
* Manages and executes parallel migrations for the balancer.
- *
- * TODO: for v3.6, remove code making compatible with v3.2 shards that take distlock.
*/
class MigrationManager {
MONGO_DISALLOW_COPYING(MigrationManager);
@@ -202,14 +200,10 @@ private:
* specified parameters. May block for distributed lock acquisition. If dist lock acquisition is
* successful (or not done), schedules the migration request and returns a notification which
* can be used to obtain the outcome of the operation.
- *
- * The 'shardTakesCollectionDistLock' parameter controls whether the distributed lock is
- * acquired by the migration manager or by the shard executing the migration request.
*/
std::shared_ptr<Notification<executor::RemoteCommandResponse>> _schedule(
OperationContext* txn,
const MigrateInfo& migrateInfo,
- bool shardTakesCollectionDistLock,
uint64_t maxChunkSizeBytes,
const MigrationSecondaryThrottleOptions& secondaryThrottle,
bool waitForDelete);
@@ -221,9 +215,9 @@ private:
* The distributed lock is acquired before scheduling the first migration for the collection and
* is only released when all active migrations on the collection have finished.
*/
- void _scheduleWithDistLock_inlock(OperationContext* txn,
- const HostAndPort& targetHost,
- Migration migration);
+ void _schedule_inlock(OperationContext* txn,
+ const HostAndPort& targetHost,
+ Migration migration);
/**
* Used internally for migrations scheduled with the distributed lock acquired by the config
@@ -231,21 +225,9 @@ private:
* passed iterator and if this is the last migration for the collection will free the collection
* distributed lock.
*/
- void _completeWithDistLock_inlock(OperationContext* txn,
- MigrationsList::iterator itMigration,
- const executor::RemoteCommandResponse& remoteCommandResponse);
-
- /**
- * Immediately schedules the specified migration without attempting to acquire the collection
- * distributed lock or checking that it is not being held.
- *
- * This method is only used for retrying migrations that have failed with LockBusy errors
- * returned by the shard, which only happens with legacy 3.2 shards that take the collection
- * distributed lock themselves.
- */
- void _scheduleWithoutDistLock_inlock(OperationContext* txn,
- const HostAndPort& targetHost,
- Migration migration);
+ void _complete_inlock(OperationContext* txn,
+ MigrationsList::iterator itMigration,
+ const executor::RemoteCommandResponse& remoteCommandResponse);
/**
* If the state of the migration manager is kStopping, checks whether there are any outstanding
@@ -306,11 +288,7 @@ private:
// Holds information about each collection's distributed lock and active migrations via a
// CollectionMigrationState object.
- CollectionMigrationsStateMap _activeMigrationsWithDistLock;
-
- // Holds information about migrations, which have been scheduled without the collection
- // distributed lock acquired (i.e., the shard is asked to acquire it).
- MigrationsList _activeMigrationsWithoutDistLock;
+ CollectionMigrationsStateMap _activeMigrations;
};
} // namespace mongo
diff --git a/src/mongo/db/s/balancer/migration_manager_test.cpp b/src/mongo/db/s/balancer/migration_manager_test.cpp
index 1c5ead4acbf..bcca90e4e25 100644
--- a/src/mongo/db/s/balancer/migration_manager_test.cpp
+++ b/src/mongo/db/s/balancer/migration_manager_test.cpp
@@ -128,11 +128,9 @@ protected:
*/
void expectMoveChunkCommand(const ChunkType& chunk,
const ShardId& toShardId,
- const bool& takeDistLock,
const BSONObj& response);
void expectMoveChunkCommand(const ChunkType& chunk,
const ShardId& toShardId,
- const bool& takeDistLock,
const Status& returnStatus);
// Random static initialization order can result in X constructor running before Y constructor
@@ -260,9 +258,8 @@ void MigrationManagerTest::checkMigrationsCollectionIsEmptyAndLocksAreUnlocked()
void MigrationManagerTest::expectMoveChunkCommand(const ChunkType& chunk,
const ShardId& toShardId,
- const bool& takeDistLock,
const BSONObj& response) {
- onCommand([&chunk, &toShardId, &takeDistLock, &response](const RemoteCommandRequest& request) {
+ onCommand([&chunk, &toShardId, &response](const RemoteCommandRequest& request) {
NamespaceString nss(request.cmdObj.firstElement().valueStringData());
ASSERT_EQ(chunk.getNS(), nss.ns());
@@ -276,7 +273,6 @@ void MigrationManagerTest::expectMoveChunkCommand(const ChunkType& chunk,
ASSERT_EQ(chunk.getShard(), moveChunkRequestWithStatus.getValue().getFromShardId());
ASSERT_EQ(toShardId, moveChunkRequestWithStatus.getValue().getToShardId());
- ASSERT_EQ(takeDistLock, moveChunkRequestWithStatus.getValue().getTakeDistLock());
return response;
});
@@ -284,11 +280,10 @@ void MigrationManagerTest::expectMoveChunkCommand(const ChunkType& chunk,
void MigrationManagerTest::expectMoveChunkCommand(const ChunkType& chunk,
const ShardId& toShardId,
- const bool& takeDistLock,
const Status& returnStatus) {
BSONObjBuilder resultBuilder;
Command::appendCommandStatus(resultBuilder, returnStatus);
- expectMoveChunkCommand(chunk, toShardId, takeDistLock, resultBuilder.obj());
+ expectMoveChunkCommand(chunk, toShardId, resultBuilder.obj());
}
TEST_F(MigrationManagerTest, OneCollectionTwoMigrations) {
@@ -334,8 +329,8 @@ TEST_F(MigrationManagerTest, OneCollectionTwoMigrations) {
});
// Expect two moveChunk commands.
- expectMoveChunkCommand(chunk1, kShardId1, false, Status::OK());
- expectMoveChunkCommand(chunk2, kShardId3, false, Status::OK());
+ expectMoveChunkCommand(chunk1, kShardId1, Status::OK());
+ expectMoveChunkCommand(chunk2, kShardId3, Status::OK());
// Run the MigrationManager code.
future.timed_get(kFutureTimeout);
@@ -396,238 +391,15 @@ TEST_F(MigrationManagerTest, TwoCollectionsTwoMigrationsEach) {
});
// Expect four moveChunk commands.
- expectMoveChunkCommand(chunk1coll1, kShardId1, false, Status::OK());
- expectMoveChunkCommand(chunk2coll1, kShardId3, false, Status::OK());
- expectMoveChunkCommand(chunk1coll2, kShardId1, false, Status::OK());
- expectMoveChunkCommand(chunk2coll2, kShardId3, false, Status::OK());
+ expectMoveChunkCommand(chunk1coll1, kShardId1, Status::OK());
+ expectMoveChunkCommand(chunk2coll1, kShardId3, Status::OK());
+ expectMoveChunkCommand(chunk1coll2, kShardId1, Status::OK());
+ expectMoveChunkCommand(chunk2coll2, kShardId3, Status::OK());
// Run the MigrationManager code.
future.timed_get(kFutureTimeout);
}
-// Old v3.2 shards expect to take the distributed lock before executing a moveChunk command. The
-// MigrationManager should take the distlock and fail the first moveChunk command with an old shard,
-// and then release the lock and retry the command successfully.
-TEST_F(MigrationManagerTest, SameCollectionOldShardMigration) {
- // Set up two shards in the metadata.
- ASSERT_OK(catalogClient()->insertConfigDocument(
- operationContext(), ShardType::ConfigNS, kShard0, kMajorityWriteConcern));
- ASSERT_OK(catalogClient()->insertConfigDocument(
- operationContext(), ShardType::ConfigNS, kShard2, kMajorityWriteConcern));
-
- // Set up the database and collection as sharded in the metadata.
- std::string dbName = "foo";
- std::string collName = "foo.bar";
- ChunkVersion version(2, 0, OID::gen());
-
- setUpDatabase(dbName, kShardId0);
- setUpCollection(collName, version);
-
- // Set up two chunks in the metadata.
- ChunkType chunk1 =
- setUpChunk(collName, kKeyPattern.globalMin(), BSON(kPattern << 49), kShardId0, version);
- version.incMinor();
- ChunkType chunk2 =
- setUpChunk(collName, BSON(kPattern << 49), kKeyPattern.globalMax(), kShardId2, version);
-
- // Going to request that these two chunks get migrated.
- const std::vector<MigrateInfo> migrationRequests{{kShardId1, chunk1}, {kShardId3, chunk2}};
-
- auto future = launchAsync([this, migrationRequests] {
- Client::initThreadIfNotAlready("Test");
- auto txn = cc().makeOperationContext();
-
- // Scheduling the moveChunk commands requires finding a host to which to send the command.
- // Set up dummy hosts for the source shards.
- shardTargeterMock(txn.get(), kShardId0)->setFindHostReturnValue(kShardHost0);
- shardTargeterMock(txn.get(), kShardId2)->setFindHostReturnValue(kShardHost2);
-
- MigrationStatuses migrationStatuses = _migrationManager->executeMigrationsForAutoBalance(
- txn.get(), migrationRequests, 0, kDefaultSecondaryThrottle, false);
-
- for (const auto& migrateInfo : migrationRequests) {
- ASSERT_OK(migrationStatuses.at(migrateInfo.getName()));
- }
- });
-
- // Expect two moveChunk commands.
- expectMoveChunkCommand(
- chunk1,
- kShardId1,
- false,
- Status(ErrorCodes::LockBusy, "SameCollectionOldShardMigration generated error."));
- expectMoveChunkCommand(chunk2, kShardId3, false, Status::OK());
- expectMoveChunkCommand(chunk1, kShardId1, true, Status::OK());
-
- // Run the MigrationManager code.
- future.timed_get(kFutureTimeout);
-}
-
-// Fail a migration if an old v3.2 shard fails to acquire the distributed lock more than once. The
-// first LockBusy error identifies the shard as an old shard to the MigrationManager, the second
-// indicates the lock is held elsewhere and unavailable.
-TEST_F(MigrationManagerTest, SameOldShardFailsToAcquireDistributedLockTwice) {
- // Set up a shard in the metadata.
- ASSERT_OK(catalogClient()->insertConfigDocument(
- operationContext(), ShardType::ConfigNS, kShard0, kMajorityWriteConcern));
-
- // Set up the database and collection as sharded in the metadata.
- std::string dbName = "foo";
- std::string collName = "foo.bar";
- ChunkVersion version(2, 0, OID::gen());
-
- setUpDatabase(dbName, kShardId0);
- setUpCollection(collName, version);
-
- // Set up a chunk in the metadata.
- ChunkType chunk1 =
- setUpChunk(collName, kKeyPattern.globalMin(), kKeyPattern.globalMax(), kShardId0, version);
-
- // Going to request that this chunk get migrated.
- const std::vector<MigrateInfo> migrationRequests{{kShardId1, chunk1}};
-
- auto future = launchAsync([this, migrationRequests] {
- Client::initThreadIfNotAlready("Test");
- auto txn = cc().makeOperationContext();
-
- // Scheduling the moveChunk commands requires finding a host to which to send the command.
- // Set up a dummy host for the source shard.
- shardTargeterMock(txn.get(), kShardId0)->setFindHostReturnValue(kShardHost0);
-
- MigrationStatuses migrationStatuses = _migrationManager->executeMigrationsForAutoBalance(
- txn.get(), migrationRequests, 0, kDefaultSecondaryThrottle, false);
-
- for (const auto& migrateInfo : migrationRequests) {
- ASSERT_EQ(ErrorCodes::LockBusy, migrationStatuses.at(migrateInfo.getName()));
- }
- });
-
- // Expect two sequential moveChunk commands to the same shard, both of which fail with LockBusy.
- expectMoveChunkCommand(
- chunk1,
- kShardId1,
- false,
- Status(ErrorCodes::LockBusy, "SameCollectionOldShardMigrations generated error."));
- expectMoveChunkCommand(
- chunk1,
- kShardId1,
- true,
- Status(ErrorCodes::LockBusy, "SameCollectionOldShardMigrations generated error."));
-
- // Run the MigrationManager code.
- future.timed_get(kFutureTimeout);
-}
-
-// If in the same collection a migration is scheduled with an old v3.2 shard, a second migration in
-// the collection with a different old v3.2 shard should get rescheduled.
-TEST_F(MigrationManagerTest, SameCollectionTwoOldShardMigrations) {
- // Set up two shards in the metadata.
- ASSERT_OK(catalogClient()->insertConfigDocument(
- operationContext(), ShardType::ConfigNS, kShard0, kMajorityWriteConcern));
- ASSERT_OK(catalogClient()->insertConfigDocument(
- operationContext(), ShardType::ConfigNS, kShard2, kMajorityWriteConcern));
-
- // Set up the database and collection as sharded in the metadata.
- std::string dbName = "foo";
- std::string collName = "foo.bar";
- ChunkVersion version(2, 0, OID::gen());
-
- setUpDatabase(dbName, kShardId0);
- setUpCollection(collName, version);
-
- // Set up two chunks in the metadata.
- ChunkType chunk1 =
- setUpChunk(collName, kKeyPattern.globalMin(), BSON(kPattern << 49), kShardId0, version);
- version.incMinor();
- ChunkType chunk2 =
- setUpChunk(collName, BSON(kPattern << 49), kKeyPattern.globalMax(), kShardId2, version);
-
- // Going to request that these two chunks get migrated.
- const std::vector<MigrateInfo> migrationRequests{{kShardId1, chunk1}, {kShardId3, chunk2}};
-
- auto future = launchAsync([this, migrationRequests] {
- Client::initThreadIfNotAlready("Test");
- auto txn = cc().makeOperationContext();
-
- // Scheduling the moveChunk commands requires finding a host to which to send the command.
- // Set up dummy hosts for the source shards.
- shardTargeterMock(txn.get(), kShardId0)->setFindHostReturnValue(kShardHost0);
- shardTargeterMock(txn.get(), kShardId2)->setFindHostReturnValue(kShardHost2);
-
- MigrationStatuses migrationStatuses = _migrationManager->executeMigrationsForAutoBalance(
- txn.get(), migrationRequests, 0, kDefaultSecondaryThrottle, false);
-
- for (const auto& migrateInfo : migrationRequests) {
- ASSERT_OK(migrationStatuses.at(migrateInfo.getName()));
- }
- });
-
- // Expect two failed moveChunk commands, then two successful moveChunk commands after the
- // balancer releases the distributed lock.
- expectMoveChunkCommand(
- chunk1,
- kShardId1,
- false,
- Status(ErrorCodes::LockBusy, "SameCollectionOldShardMigration generated error."));
- expectMoveChunkCommand(
- chunk2,
- kShardId3,
- false,
- Status(ErrorCodes::LockBusy, "SameCollectionOldShardMigration generated error."));
- expectMoveChunkCommand(chunk1, kShardId1, true, Status::OK());
- expectMoveChunkCommand(chunk2, kShardId3, true, Status::OK());
-
- // Run the MigrationManager code.
- future.timed_get(kFutureTimeout);
-}
-
-// Takes the distributed lock for a collection so that that the MigrationManager is unable to
-// schedule migrations for that collection.
-TEST_F(MigrationManagerTest, FailToAcquireDistributedLock) {
- // Set up two shards in the metadata.
- ASSERT_OK(catalogClient()->insertConfigDocument(
- operationContext(), ShardType::ConfigNS, kShard0, kMajorityWriteConcern));
- ASSERT_OK(catalogClient()->insertConfigDocument(
- operationContext(), ShardType::ConfigNS, kShard2, kMajorityWriteConcern));
-
- // Set up the database and collection as sharded in the metadata.
- std::string dbName = "foo";
- std::string collName = "foo.bar";
- ChunkVersion version(2, 0, OID::gen());
-
- setUpDatabase(dbName, kShardId0);
- setUpCollection(collName, version);
-
- // Set up two chunks in the metadata.
- ChunkType chunk1 =
- setUpChunk(collName, kKeyPattern.globalMin(), BSON(kPattern << 49), kShardId0, version);
- version.incMinor();
- ChunkType chunk2 =
- setUpChunk(collName, BSON(kPattern << 49), kKeyPattern.globalMax(), kShardId2, version);
-
- // Going to request that these two chunks get migrated.
- const std::vector<MigrateInfo> migrationRequests{{kShardId1, chunk1}, {kShardId3, chunk2}};
-
- shardTargeterMock(operationContext(), kShardId0)->setFindHostReturnValue(kShardHost0);
- shardTargeterMock(operationContext(), kShardId2)->setFindHostReturnValue(kShardHost2);
-
- // Take the distributed lock for the collection before scheduling via the MigrationManager.
- const std::string whyMessage("FailToAcquireDistributedLock unit-test taking distributed lock");
- DistLockManager::ScopedDistLock distLockStatus = assertGet(
- catalogClient()->getDistLockManager()->lock(operationContext(),
- chunk1.getNS(),
- whyMessage,
- DistLockManager::kSingleLockAttemptTimeout));
-
- MigrationStatuses migrationStatuses = _migrationManager->executeMigrationsForAutoBalance(
- operationContext(), migrationRequests, 0, kDefaultSecondaryThrottle, false);
-
- for (const auto& migrateInfo : migrationRequests) {
- ASSERT_EQ(ErrorCodes::ConflictingOperationInProgress,
- migrationStatuses.at(migrateInfo.getName()));
- }
-}
-
// The MigrationManager should fail the migration if a host is not found for the source shard.
// Scheduling a moveChunk command requires finding a host to which to send the command.
TEST_F(MigrationManagerTest, SourceShardNotFound) {
@@ -674,12 +446,13 @@ TEST_F(MigrationManagerTest, SourceShardNotFound) {
});
// Expect only one moveChunk command to be called.
- expectMoveChunkCommand(chunk1, kShardId1, false, Status::OK());
+ expectMoveChunkCommand(chunk1, kShardId1, Status::OK());
// Run the MigrationManager code.
future.timed_get(kFutureTimeout);
}
+// TODO: Delete in 3.8
TEST_F(MigrationManagerTest, JumboChunkResponseBackwardsCompatibility) {
// Set up one shard in the metadata.
ASSERT_OK(catalogClient()->insertConfigDocument(
@@ -715,7 +488,7 @@ TEST_F(MigrationManagerTest, JumboChunkResponseBackwardsCompatibility) {
});
// Expect only one moveChunk command to be called.
- expectMoveChunkCommand(chunk1, kShardId1, false, BSON("ok" << 0 << "chunkTooBig" << true));
+ expectMoveChunkCommand(chunk1, kShardId1, BSON("ok" << 0 << "chunkTooBig" << true));
// Run the MigrationManager code.
future.timed_get(kFutureTimeout);
@@ -839,7 +612,7 @@ TEST_F(MigrationManagerTest, RestartMigrationManager) {
});
// Expect only one moveChunk command to be called.
- expectMoveChunkCommand(chunk1, kShardId1, false, Status::OK());
+ expectMoveChunkCommand(chunk1, kShardId1, Status::OK());
// Run the MigrationManager code.
future.timed_get(kFutureTimeout);
@@ -893,8 +666,8 @@ TEST_F(MigrationManagerTest, MigrationRecovery) {
});
// Expect two moveChunk commands.
- expectMoveChunkCommand(chunk1, kShardId1, false, Status::OK());
- expectMoveChunkCommand(chunk2, kShardId3, false, Status::OK());
+ expectMoveChunkCommand(chunk1, kShardId1, Status::OK());
+ expectMoveChunkCommand(chunk2, kShardId3, Status::OK());
// Run the MigrationManager code.
future.timed_get(kFutureTimeout);
@@ -1005,7 +778,6 @@ TEST_F(MigrationManagerTest, RemoteCallErrorConversionToOperationFailed) {
expectMoveChunkCommand(
chunk1,
kShardId1,
- false,
Status(ErrorCodes::NotMasterOrSecondary,
"RemoteCallErrorConversionToOperationFailedCheck generated error."));
@@ -1013,7 +785,6 @@ TEST_F(MigrationManagerTest, RemoteCallErrorConversionToOperationFailed) {
expectMoveChunkCommand(
chunk2,
kShardId3,
- false,
Status(ErrorCodes::ExceededTimeLimit,
"RemoteCallErrorConversionToOperationFailedCheck generated error."));