diff options
author | Andrew Shuvalov <andrew.shuvalov@mongodb.com> | 2021-06-14 17:34:11 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-06-14 17:47:31 +0000 |
commit | 593b212f742775144c921fe62b0f3ddfb7125e86 (patch) | |
tree | da395d6c6fffcf516258d56d08297aa15f9a2173 | |
parent | daf92dae320330eb7ff74a07f3855f0aa6da2baf (diff) | |
download | mongo-593b212f742775144c921fe62b0f3ddfb7125e86.tar.gz |
SERVER-57454: NotMaster error from chunk recipient is not propagated back to config server
-rw-r--r-- | src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp | 22 | ||||
-rw-r--r-- | src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp | 34 | ||||
-rw-r--r-- | src/mongo/shell/shardingtest.js | 2 |
3 files changed, 55 insertions, 3 deletions
diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp index e02a0ee23e3..957cf076bf6 100644 --- a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp +++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp @@ -579,19 +579,39 @@ StatusWith<BSONObj> MigrationChunkClonerSourceLegacy::_callRecipient(const BSONO responseStatus = args.response; }); - // TODO: Update RemoteCommandTargeter on NotMaster errors. if (!scheduleStatus.isOK()) { return scheduleStatus.getStatus(); } executor->wait(scheduleStatus.getValue()); + auto checkNotMasterOrNetwork = [](const Status& status) { + return ErrorCodes::isNotMasterError(status.code()) || + ErrorCodes::isNetworkError(status.code()) || + status.code() == ErrorCodes::NetworkInterfaceExceededTimeLimit; + }; + if (!responseStatus.isOK()) { + if (checkNotMasterOrNetwork(responseStatus.status)) { + // Convert the error before it's returned to load balancer's MigrationManager + // to avoid it marking this host as having network issues. + warning() << "Migration chunk received error from " << _recipientHost << ": " + << responseStatus.status << ", converting to OperationFailed"; + responseStatus.status = + Status(ErrorCodes::OperationFailed, responseStatus.status.toString()); + } return responseStatus.status; } Status commandStatus = getStatusFromCommandResult(responseStatus.data); if (!commandStatus.isOK()) { + if (checkNotMasterOrNetwork(commandStatus)) { + // Convert the error before it's returned to load balancer's MigrationManager + // to avoid it marking this host as no longer primary. + warning() << "Migration chunk received error from " << _recipientHost << ": " + << commandStatus << ", converting to OperationFailed"; + commandStatus = Status(ErrorCodes::OperationFailed, commandStatus.toString()); + } return commandStatus; } diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp index 07e2406313b..aafc5aac1ae 100644 --- a/src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp +++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp @@ -352,7 +352,9 @@ TEST_F(MigrationChunkClonerSourceLegacyTest, FailedToEngageRecipientShard) { }); auto startCloneStatus = cloner.startClone(operationContext()); - ASSERT_EQ(ErrorCodes::NetworkTimeout, startCloneStatus.code()); + // Error is converted to OperationFailed as if it's propagated to the MigrationManager + // it will mark the migration source as having network problems. + ASSERT_EQ(ErrorCodes::OperationFailed, startCloneStatus.code()); futureStartClone.timed_get(kFutureTimeout); } @@ -380,5 +382,35 @@ TEST_F(MigrationChunkClonerSourceLegacyTest, FailedToEngageRecipientShard) { cloner.cancelClone(operationContext()); } +TEST_F(MigrationChunkClonerSourceLegacyTest, RecipientShardFailsWithIsNotMaster) { + const std::vector<BSONObj> contents = {createCollectionDocument(99), + createCollectionDocument(100), + createCollectionDocument(199), + createCollectionDocument(200)}; + + createShardedCollection(contents); + + MigrationChunkClonerSourceLegacy cloner( + createMoveChunkRequest(ChunkRange(BSON("X" << 100), BSON("X" << 200))), + kShardKeyPattern, + kDonorConnStr, + kRecipientConnStr.getServers()[0]); + + { + auto futureStartClone = launchAsync([&]() { + onCommand([&](const RemoteCommandRequest& request) { + return Status(ErrorCodes::NotMaster, "Simulate failover at recipient"); + }); + }); + + auto startCloneStatus = cloner.startClone(operationContext()); + // Error is converted to OperationFailed as if it's propagated to the MigrationManager + // it will mark the migration source as no longer primary. + ASSERT_EQ(ErrorCodes::OperationFailed, startCloneStatus.code()); + futureStartClone.timed_get(kFutureTimeout); + } + cloner.cancelClone(operationContext()); +} + } // namespace } // namespace mongo diff --git a/src/mongo/shell/shardingtest.js b/src/mongo/shell/shardingtest.js index dcb46c30df4..d6c8a21c424 100644 --- a/src/mongo/shell/shardingtest.js +++ b/src/mongo/shell/shardingtest.js @@ -702,7 +702,7 @@ var ShardingTest = function(params) { } var result; - for (var i = 0; i < 5; i++) { + for (var i = 0; i < 10; i++) { var otherShard = this.getOther(this.getPrimaryShard(dbName)).name; result = this.s.adminCommand( {movechunk: c, find: move, to: otherShard, _waitForDelete: waitForDelete}); |