summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Shuvalov <andrew.shuvalov@mongodb.com>2021-06-14 17:34:11 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-06-14 17:47:31 +0000
commit593b212f742775144c921fe62b0f3ddfb7125e86 (patch)
treeda395d6c6fffcf516258d56d08297aa15f9a2173
parentdaf92dae320330eb7ff74a07f3855f0aa6da2baf (diff)
downloadmongo-593b212f742775144c921fe62b0f3ddfb7125e86.tar.gz
SERVER-57454: NotMaster error from chunk recipient is not propagated back to config server
-rw-r--r--src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp22
-rw-r--r--src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp34
-rw-r--r--src/mongo/shell/shardingtest.js2
3 files changed, 55 insertions, 3 deletions
diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
index e02a0ee23e3..957cf076bf6 100644
--- a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
+++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
@@ -579,19 +579,39 @@ StatusWith<BSONObj> MigrationChunkClonerSourceLegacy::_callRecipient(const BSONO
responseStatus = args.response;
});
- // TODO: Update RemoteCommandTargeter on NotMaster errors.
if (!scheduleStatus.isOK()) {
return scheduleStatus.getStatus();
}
executor->wait(scheduleStatus.getValue());
+ auto checkNotMasterOrNetwork = [](const Status& status) {
+ return ErrorCodes::isNotMasterError(status.code()) ||
+ ErrorCodes::isNetworkError(status.code()) ||
+ status.code() == ErrorCodes::NetworkInterfaceExceededTimeLimit;
+ };
+
if (!responseStatus.isOK()) {
+ if (checkNotMasterOrNetwork(responseStatus.status)) {
+ // Convert the error before it's returned to load balancer's MigrationManager
+ // to avoid it marking this host as having network issues.
+ warning() << "Migration chunk received error from " << _recipientHost << ": "
+ << responseStatus.status << ", converting to OperationFailed";
+ responseStatus.status =
+ Status(ErrorCodes::OperationFailed, responseStatus.status.toString());
+ }
return responseStatus.status;
}
Status commandStatus = getStatusFromCommandResult(responseStatus.data);
if (!commandStatus.isOK()) {
+ if (checkNotMasterOrNetwork(commandStatus)) {
+ // Convert the error before it's returned to load balancer's MigrationManager
+ // to avoid it marking this host as no longer primary.
+ warning() << "Migration chunk received error from " << _recipientHost << ": "
+ << commandStatus << ", converting to OperationFailed";
+ commandStatus = Status(ErrorCodes::OperationFailed, commandStatus.toString());
+ }
return commandStatus;
}
diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp
index 07e2406313b..aafc5aac1ae 100644
--- a/src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp
+++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp
@@ -352,7 +352,9 @@ TEST_F(MigrationChunkClonerSourceLegacyTest, FailedToEngageRecipientShard) {
});
auto startCloneStatus = cloner.startClone(operationContext());
- ASSERT_EQ(ErrorCodes::NetworkTimeout, startCloneStatus.code());
+ // Error is converted to OperationFailed as if it's propagated to the MigrationManager
+ // it will mark the migration source as having network problems.
+ ASSERT_EQ(ErrorCodes::OperationFailed, startCloneStatus.code());
futureStartClone.timed_get(kFutureTimeout);
}
@@ -380,5 +382,35 @@ TEST_F(MigrationChunkClonerSourceLegacyTest, FailedToEngageRecipientShard) {
cloner.cancelClone(operationContext());
}
+TEST_F(MigrationChunkClonerSourceLegacyTest, RecipientShardFailsWithIsNotMaster) {
+ const std::vector<BSONObj> contents = {createCollectionDocument(99),
+ createCollectionDocument(100),
+ createCollectionDocument(199),
+ createCollectionDocument(200)};
+
+ createShardedCollection(contents);
+
+ MigrationChunkClonerSourceLegacy cloner(
+ createMoveChunkRequest(ChunkRange(BSON("X" << 100), BSON("X" << 200))),
+ kShardKeyPattern,
+ kDonorConnStr,
+ kRecipientConnStr.getServers()[0]);
+
+ {
+ auto futureStartClone = launchAsync([&]() {
+ onCommand([&](const RemoteCommandRequest& request) {
+ return Status(ErrorCodes::NotMaster, "Simulate failover at recipient");
+ });
+ });
+
+ auto startCloneStatus = cloner.startClone(operationContext());
+ // Error is converted to OperationFailed as if it's propagated to the MigrationManager
+ // it will mark the migration source as no longer primary.
+ ASSERT_EQ(ErrorCodes::OperationFailed, startCloneStatus.code());
+ futureStartClone.timed_get(kFutureTimeout);
+ }
+ cloner.cancelClone(operationContext());
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/shell/shardingtest.js b/src/mongo/shell/shardingtest.js
index dcb46c30df4..d6c8a21c424 100644
--- a/src/mongo/shell/shardingtest.js
+++ b/src/mongo/shell/shardingtest.js
@@ -702,7 +702,7 @@ var ShardingTest = function(params) {
}
var result;
- for (var i = 0; i < 5; i++) {
+ for (var i = 0; i < 10; i++) {
var otherShard = this.getOther(this.getPrimaryShard(dbName)).name;
result = this.s.adminCommand(
{movechunk: c, find: move, to: otherShard, _waitForDelete: waitForDelete});