diff options
author | Siyuan Zhou <siyuan.zhou@mongodb.com> | 2016-08-16 12:05:35 -0400 |
---|---|---|
committer | Siyuan Zhou <siyuan.zhou@mongodb.com> | 2016-08-25 13:34:16 -0400 |
commit | 6b571fa314a9c5d193d362570bb58064d1d1fb0f (patch) | |
tree | 65fe8ecb710be879db8109bc3f92e4718efafdba | |
parent | 3eaf36bc9fb28f9ca63b0d7de33e9f587aa88325 (diff) | |
download | mongo-6b571fa314a9c5d193d362570bb58064d1d1fb0f.tar.gz |
SERVER-25126 Return a different error code if the step down occurs after the write
-rw-r--r-- | src/mongo/base/error_codes.err | 1 | ||||
-rw-r--r-- | src/mongo/client/remote_command_retry_scheduler.cpp | 1 | ||||
-rw-r--r-- | src/mongo/db/cloner.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/commands/mr.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/exec/update.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/ops/update.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/ops/write_ops_exec.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/query/get_executor.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 15 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl_test.cpp | 4 | ||||
-rw-r--r-- | src/mongo/s/client/shard_remote.cpp | 3 | ||||
-rw-r--r-- | src/mongo/s/write_ops/batch_downconvert.cpp | 2 |
12 files changed, 26 insertions, 24 deletions
diff --git a/src/mongo/base/error_codes.err b/src/mongo/base/error_codes.err index 59a8c6acaaa..2ffffeae5c5 100644 --- a/src/mongo/base/error_codes.err +++ b/src/mongo/base/error_codes.err @@ -187,6 +187,7 @@ error_code("ClientMetadataDocumentTooLarge", 185) error_code("ClientMetadataCannotBeMutated", 186) error_code("LinearizableReadConcernError", 187) error_code("IncompatibleServerVersion", 188) +error_code("PrimarySteppedDown", 189) # Non-sequential error codes (for compatibility only) error_code("SocketException", 9001) diff --git a/src/mongo/client/remote_command_retry_scheduler.cpp b/src/mongo/client/remote_command_retry_scheduler.cpp index ded4a10d964..f72874a33a4 100644 --- a/src/mongo/client/remote_command_retry_scheduler.cpp +++ b/src/mongo/client/remote_command_retry_scheduler.cpp @@ -95,6 +95,7 @@ const std::initializer_list<ErrorCodes::Error> RemoteCommandRetryScheduler::kAll ErrorCodes::HostUnreachable, ErrorCodes::HostNotFound, ErrorCodes::NetworkTimeout, + ErrorCodes::PrimarySteppedDown, ErrorCodes::InterruptedDueToReplStateChange}; std::unique_ptr<RemoteCommandRetryScheduler::RetryPolicy> diff --git a/src/mongo/db/cloner.cpp b/src/mongo/db/cloner.cpp index a9e9d763871..3ea687fdd5d 100644 --- a/src/mongo/db/cloner.cpp +++ b/src/mongo/db/cloner.cpp @@ -279,7 +279,7 @@ void Cloner::copy(OperationContext* txn, options); } - uassert(ErrorCodes::NotMaster, + uassert(ErrorCodes::PrimarySteppedDown, str::stream() << "Not primary while cloning collection " << from_collection.ns() << " to " << to_collection.ns() @@ -311,7 +311,7 @@ void Cloner::copyIndexes(OperationContext* txn, } } - uassert(ErrorCodes::NotMaster, + uassert(ErrorCodes::PrimarySteppedDown, str::stream() << "Not primary while copying indexes from " << from_collection.ns() << " to " << to_collection.ns() @@ -382,7 +382,7 @@ bool Cloner::copyCollection(OperationContext* txn, ScopedTransaction transaction(txn, MODE_IX); Lock::DBLock dbWrite(txn->lockState(), dbname, MODE_X); - uassert(ErrorCodes::NotMaster, + uassert(ErrorCodes::PrimarySteppedDown, str::stream() << "Not primary while copying collection " << ns << " (Cloner)", !txn->writesAreReplicated() || repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(nss)); diff --git a/src/mongo/db/commands/mr.cpp b/src/mongo/db/commands/mr.cpp index f25289baecd..26b40571fd0 100644 --- a/src/mongo/db/commands/mr.cpp +++ b/src/mongo/db/commands/mr.cpp @@ -465,8 +465,8 @@ void State::prepTempCollection() { OldClientWriteContext tempCtx(_txn, _config.tempNamespace); WriteUnitOfWork wuow(_txn); NamespaceString tempNss(_config.tempNamespace); - uassert(ErrorCodes::NotMaster, - "no longer master", + uassert(ErrorCodes::PrimarySteppedDown, + "no longer primary", repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(tempNss)); Collection* tempColl = tempCtx.getCollection(); invariant(!tempColl); @@ -711,8 +711,8 @@ void State::insert(const string& ns, const BSONObj& o) { OldClientWriteContext ctx(_txn, ns); WriteUnitOfWork wuow(_txn); NamespaceString nss(ns); - uassert(ErrorCodes::NotMaster, - "no longer master", + uassert(ErrorCodes::PrimarySteppedDown, + "no longer primary", repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(nss)); Collection* coll = getCollectionOrUassert(ctx.db(), ns); diff --git a/src/mongo/db/exec/update.cpp b/src/mongo/db/exec/update.cpp index 72c84ac6f7e..5041ff1ceb3 100644 --- a/src/mongo/db/exec/update.cpp +++ b/src/mongo/db/exec/update.cpp @@ -1010,7 +1010,7 @@ Status UpdateStage::restoreUpdateState() { !repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(nsString); if (userInitiatedWritesAndNotPrimary) { - return Status(ErrorCodes::NotMaster, + return Status(ErrorCodes::PrimarySteppedDown, str::stream() << "Demoted from primary while performing update on " << nsString.ns()); } diff --git a/src/mongo/db/ops/update.cpp b/src/mongo/db/ops/update.cpp index c808d9aab34..5e0763f9eac 100644 --- a/src/mongo/db/ops/update.cpp +++ b/src/mongo/db/ops/update.cpp @@ -94,7 +94,7 @@ UpdateResult update(OperationContext* txn, Database* db, const UpdateRequest& re !repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(nsString); if (userInitiatedWritesAndNotPrimary) { - uassertStatusOK(Status(ErrorCodes::NotMaster, + uassertStatusOK(Status(ErrorCodes::PrimarySteppedDown, str::stream() << "Not primary while creating collection " << nsString.ns() << " during upsert")); diff --git a/src/mongo/db/ops/write_ops_exec.cpp b/src/mongo/db/ops/write_ops_exec.cpp index 95b73b51873..52db4a5674a 100644 --- a/src/mongo/db/ops/write_ops_exec.cpp +++ b/src/mongo/db/ops/write_ops_exec.cpp @@ -167,7 +167,7 @@ private: }; void assertCanWrite_inlock(OperationContext* txn, const NamespaceString& ns) { - uassert(ErrorCodes::NotMaster, + uassert(ErrorCodes::PrimarySteppedDown, str::stream() << "Not primary while writing to " << ns.ns(), repl::ReplicationCoordinator::get(txn->getServiceContext())->canAcceptWritesFor(ns)); CollectionShardingState::get(txn, ns)->checkShardVersionOrThrow(txn); diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index 4742f915d0d..71f0753aa0a 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -704,7 +704,7 @@ StatusWith<unique_ptr<PlanExecutor>> getExecutorDelete(OperationContext* txn, !repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(nss); if (userInitiatedWritesAndNotPrimary) { - return Status(ErrorCodes::NotMaster, + return Status(ErrorCodes::PrimarySteppedDown, str::stream() << "Not primary while removing from " << nss.ns()); } @@ -870,7 +870,7 @@ StatusWith<unique_ptr<PlanExecutor>> getExecutorUpdate(OperationContext* txn, !repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(nsString); if (userInitiatedWritesAndNotPrimary) { - return Status(ErrorCodes::NotMaster, + return Status(ErrorCodes::PrimarySteppedDown, str::stream() << "Not primary while performing update on " << nsString.ns()); } diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 6f4c438b255..acf4179bff8 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -1625,9 +1625,9 @@ ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorImpl::_awaitRepl } if (replMode == modeReplSet && !_memberState.primary()) { - return StatusAndDuration( - Status(ErrorCodes::NotMaster, "Not master while waiting for replication"), - Milliseconds(timer->millis())); + return StatusAndDuration(Status(ErrorCodes::PrimarySteppedDown, + "Primary stepped down while waiting for replication"), + Milliseconds(timer->millis())); } if (writeConcern.wMode.empty()) { @@ -1651,11 +1651,10 @@ ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorImpl::_awaitRepl } if (replMode == modeReplSet && !_getMemberState_inlock().primary()) { - return StatusAndDuration(Status(ErrorCodes::NotMaster, - "Not master anymore while waiting for replication" - " - this most likely means that a step down" - " occurred while waiting for replication"), - elapsed); + return StatusAndDuration( + Status(ErrorCodes::PrimarySteppedDown, + "Not primary anymore while waiting for replication - primary stepped down"), + elapsed); } if (writeConcern.wTimeout != WriteConcernOptions::kNoTimeout && diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp index bd3267cb172..659096ed571 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp @@ -655,7 +655,7 @@ TEST_F(ReplCoordTest, NodeReturnsNotMasterWhenRunningAwaitReplicationAgainstASec // Node should fail to awaitReplication when not primary. ReplicationCoordinator::StatusAndDuration statusAndDur = getReplCoord()->awaitReplication(txn.get(), time, writeConcern); - ASSERT_EQUALS(ErrorCodes::NotMaster, statusAndDur.status); + ASSERT_EQUALS(ErrorCodes::PrimarySteppedDown, statusAndDur.status); } TEST_F(ReplCoordTest, NodeReturnsOkWhenRunningAwaitReplicationAgainstPrimaryWithWZero) { @@ -1321,7 +1321,7 @@ TEST_F(ReplCoordTest, NodeReturnsNotMasterWhenSteppingDownBeforeSatisfyingAWrite ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(2, 2, time1)); getReplCoord()->stepDown(txn.get(), true, Milliseconds(0), Milliseconds(1000)); ReplicationCoordinator::StatusAndDuration statusAndDur = awaiter.getResult(); - ASSERT_EQUALS(ErrorCodes::NotMaster, statusAndDur.status); + ASSERT_EQUALS(ErrorCodes::PrimarySteppedDown, statusAndDur.status); awaiter.reset(); } diff --git a/src/mongo/s/client/shard_remote.cpp b/src/mongo/s/client/shard_remote.cpp index 006647b748c..6ae61ab1e19 100644 --- a/src/mongo/s/client/shard_remote.cpp +++ b/src/mongo/s/client/shard_remote.cpp @@ -137,7 +137,8 @@ void ShardRemote::updateReplSetMonitor(const HostAndPort& remoteHost, return; if (ErrorCodes::isNotMasterError(remoteCommandStatus.code()) || - (remoteCommandStatus == ErrorCodes::InterruptedDueToReplStateChange)) { + (remoteCommandStatus == ErrorCodes::InterruptedDueToReplStateChange) || + (remoteCommandStatus == ErrorCodes::PrimarySteppedDown)) { _targeter->markHostNotMaster(remoteHost); } else if (ErrorCodes::isNetworkError(remoteCommandStatus.code())) { _targeter->markHostUnreachable(remoteHost); diff --git a/src/mongo/s/write_ops/batch_downconvert.cpp b/src/mongo/s/write_ops/batch_downconvert.cpp index 1ece78ecdd5..2d792351b60 100644 --- a/src/mongo/s/write_ops/batch_downconvert.cpp +++ b/src/mongo/s/write_ops/batch_downconvert.cpp @@ -90,7 +90,7 @@ Status extractGLEErrors(const BSONObj& gleResponse, GLEErrors* errors) { // 2.6 Error codes || code == ErrorCodes::NotMaster || code == ErrorCodes::UnknownReplWriteConcern || - code == ErrorCodes::WriteConcernFailed) { + code == ErrorCodes::WriteConcernFailed || code == ErrorCodes::PrimarySteppedDown) { // Write concern errors that get returned as regular errors (result may not be ok: 1.0) errors->wcError.reset(new WriteConcernErrorDetail()); errors->wcError->setErrCode(ErrorCodes::fromInt(code)); |