diff options
Diffstat (limited to 'src/mongo/executor')
-rw-r--r-- | src/mongo/executor/SConscript | 11 | ||||
-rw-r--r-- | src/mongo/executor/hedged_remote_command_runner.h | 43 | ||||
-rw-r--r-- | src/mongo/executor/hedged_remote_command_runner_test.cpp | 79 | ||||
-rw-r--r-- | src/mongo/executor/network_interface_mock.cpp | 31 | ||||
-rw-r--r-- | src/mongo/executor/network_interface_tl.cpp | 17 | ||||
-rw-r--r-- | src/mongo/executor/remote_command_runner.cpp | 27 | ||||
-rw-r--r-- | src/mongo/executor/remote_command_runner_error_info.cpp | 50 | ||||
-rw-r--r-- | src/mongo/executor/remote_command_runner_error_info.h | 151 | ||||
-rw-r--r-- | src/mongo/executor/remote_command_runner_test.cpp | 67 |
9 files changed, 396 insertions, 80 deletions
diff --git a/src/mongo/executor/SConscript b/src/mongo/executor/SConscript index 815159efd70..d0f2301cd21 100644 --- a/src/mongo/executor/SConscript +++ b/src/mongo/executor/SConscript @@ -246,6 +246,16 @@ env.Library( ) env.Library( + target='remote_command_runner_error_info', + source=[ + 'remote_command_runner_error_info.cpp', + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/base', + ], +) + +env.Library( target='remote_command_runner', source=[ 'remote_command_runner.cpp', @@ -255,6 +265,7 @@ env.Library( '$BUILD_DIR/mongo/db/service_context_test_fixture', '$BUILD_DIR/mongo/rpc/command_status', 'remote_command', + 'remote_command_runner_error_info', 'task_executor_interface', ], ) diff --git a/src/mongo/executor/hedged_remote_command_runner.h b/src/mongo/executor/hedged_remote_command_runner.h index 66f4a51d8d1..8f72312c2c7 100644 --- a/src/mongo/executor/hedged_remote_command_runner.h +++ b/src/mongo/executor/hedged_remote_command_runner.h @@ -37,9 +37,11 @@ #include "mongo/db/query/cursor_response.h" #include "mongo/executor/remote_command_response.h" #include "mongo/executor/remote_command_runner.h" +#include "mongo/executor/remote_command_runner_error_info.h" #include "mongo/executor/remote_command_targeter.h" #include "mongo/executor/task_executor.h" #include "mongo/rpc/get_status_from_command_result.h" +#include "mongo/s/hedge_options_util.h" #include "mongo/s/mongos_server_parameters_gen.h" #include "mongo/util/assert_util.h" #include "mongo/util/cancellation.h" @@ -155,27 +157,34 @@ SemiFuture<RemoteCommandRunnerResponse<typename CommandType::Reply>> doHedgedReq * the future with index 0, which we treat as the "authoritative" request. This is the * codepath followed when we are not hedging or there is only 1 target provided. */ - return whenAnyThat(std::move(requests), - [](StatusWith<SingleResponse> response, size_t index) { - Status commandStatus = response.getStatus(); - if (index == 0) { - return true; - } - - if (commandStatus == ErrorCodes::MaxTimeMSExpired || - commandStatus == ErrorCodes::StaleDbVersion || - ErrorCodes::isStaleShardVersionError(commandStatus)) { - return false; - } - return true; - }); + return whenAnyThat( + std::move(requests), [](StatusWith<SingleResponse> response, size_t index) { + Status commandStatus = response.getStatus(); + + if (index == 0) { + return true; + } + if (commandStatus.code() == Status::OK()) { + return true; + } + + // TODO SERVER-69592 Account for interior executor shutdown + invariant(commandStatus.code() == ErrorCodes::RemoteCommandExecutionError); + boost::optional<Status> remoteErr; + auto extraInfo = commandStatus.extraInfo<RemoteCommandExecutionErrorInfo>(); + if (extraInfo->isRemote()) { + remoteErr = extraInfo->asRemote().getRemoteCommandResult(); + } + + if (remoteErr && isIgnorableAsHedgeResult(*remoteErr)) { + return false; + } + return true; + }); }) .onCompletion([hedgeCancellationToken](StatusWith<SingleResponse> result) mutable { // TODO SERVER-68101 add retry logic - // TODO SERVER-68555 add extra error handling info - hedgeCancellationToken.cancel(); - return result; }) .semi(); diff --git a/src/mongo/executor/hedged_remote_command_runner_test.cpp b/src/mongo/executor/hedged_remote_command_runner_test.cpp index 3daeded83f6..b879d5a2a02 100644 --- a/src/mongo/executor/hedged_remote_command_runner_test.cpp +++ b/src/mongo/executor/hedged_remote_command_runner_test.cpp @@ -141,6 +141,11 @@ TEST_F(HedgedCommandRunnerTest, FindHedgeRequestTwoHosts) { RemoteCommandRunnerResponse<CursorInitialReply> res = resultFuture.get(); + auto network = getNetworkInterfaceMock(); + network->enterNetwork(); + network->runReadyNetworkOperations(); + network->exitNetwork(); + auto counters = getNetworkInterfaceCounters(); ASSERT_EQ(counters.succeeded, 1); ASSERT_EQ(counters.canceled, 1); @@ -171,6 +176,11 @@ TEST_F(HedgedCommandRunnerTest, FindHedgeRequestThreeHosts) { RemoteCommandRunnerResponse<CursorInitialReply> res = resultFuture.get(); + auto network = getNetworkInterfaceMock(); + network->enterNetwork(); + network->runReadyNetworkOperations(); + network->exitNetwork(); + auto counters = getNetworkInterfaceCounters(); ASSERT_EQ(counters.succeeded, 1); ASSERT_EQ(counters.canceled, 2); @@ -263,11 +273,16 @@ TEST_F(HedgedCommandRunnerTest, FirstCommandFailsWithSignificantError) { return Status(ErrorCodes::NetworkTimeout, "mock"); }); + auto network = getNetworkInterfaceMock(); + network->enterNetwork(); + network->runReadyNetworkOperations(); + network->exitNetwork(); + auto counters = getNetworkInterfaceCounters(); ASSERT_EQ(counters.failed, 1); ASSERT_EQ(counters.canceled, 1); - ASSERT_THROWS_CODE(resultFuture.get(), DBException, ErrorCodes::NetworkTimeout); + ASSERT_THROWS_CODE(resultFuture.get(), DBException, ErrorCodes::RemoteCommandExecutionError); } /** @@ -292,19 +307,19 @@ TEST_F(HedgedCommandRunnerTest, BothCommandsFailWithSkippableError) { onCommand([&](const auto& request) { ASSERT(request.cmdObj["find"]); - return Status(ErrorCodes::MaxTimeMSExpired, "mock"); + return createErrorResponse(Status(ErrorCodes::MaxTimeMSExpired, "mock")); }); onCommand([&](const auto& request) { ASSERT(request.cmdObj["find"]); - return Status(ErrorCodes::MaxTimeMSExpired, "mock"); + return createErrorResponse(Status(ErrorCodes::MaxTimeMSExpired, "mock")); }); auto counters = getNetworkInterfaceCounters(); - ASSERT_EQ(counters.failed, 2); + ASSERT_EQ(counters.sent, 2); ASSERT_EQ(counters.canceled, 0); - ASSERT_THROWS_CODE(resultFuture.get(), DBException, ErrorCodes::MaxTimeMSExpired); + ASSERT_THROWS_CODE(resultFuture.get(), DBException, ErrorCodes::RemoteCommandExecutionError); } TEST_F(HedgedCommandRunnerTest, AllCommandsFailWithSkippableError) { @@ -324,7 +339,7 @@ TEST_F(HedgedCommandRunnerTest, AllCommandsFailWithSkippableError) { CancellationToken::uncancelable()); auto network = getNetworkInterfaceMock(); - auto now = getNetworkInterfaceMock()->now(); + auto now = network->now(); network->enterNetwork(); NetworkInterfaceMock::NetworkOperationIterator noi1 = network->getNextReadyRequest(); @@ -335,25 +350,42 @@ TEST_F(HedgedCommandRunnerTest, AllCommandsFailWithSkippableError) { auto secondRequest = (*noi2).getRequestOnAny(); auto thirdRequest = (*noi3).getRequestOnAny(); + auto remoteMaxTimeMSError = RemoteCommandResponse{ + createErrorResponse(Status(ErrorCodes::MaxTimeMSExpired, "mock")), Milliseconds(1)}; if (firstRequest.target[0] == kThreeHosts[0]) { - network->scheduleErrorResponse( - noi1, now + Milliseconds(1000), Status(ErrorCodes::MaxTimeMSExpired, "mock")); + network->scheduleResponse( + noi1, + now + Milliseconds(1000), + {createErrorResponse(Status(ErrorCodes::MaxTimeMSExpired, "mock")), Milliseconds(1)}); } else { - network->scheduleErrorResponse(noi1, Status(ErrorCodes::MaxTimeMSExpired, "mock")); + network->scheduleResponse( + noi1, + now, + {createErrorResponse(Status(ErrorCodes::MaxTimeMSExpired, "mock")), Milliseconds(1)}); } if (secondRequest.target[0] == kThreeHosts[0]) { - network->scheduleErrorResponse( - noi2, now + Milliseconds(1000), Status(ErrorCodes::MaxTimeMSExpired, "mock")); + network->scheduleResponse( + noi2, + now + Milliseconds(1000), + {createErrorResponse(Status(ErrorCodes::MaxTimeMSExpired, "mock")), Milliseconds(1)}); } else { - network->scheduleErrorResponse(noi2, Status(ErrorCodes::MaxTimeMSExpired, "mock")); + network->scheduleResponse( + noi2, + now, + {createErrorResponse(Status(ErrorCodes::MaxTimeMSExpired, "mock")), Milliseconds(1)}); } if (thirdRequest.target[0] == kThreeHosts[0]) { - network->scheduleErrorResponse( - noi3, now + Milliseconds(1000), Status(ErrorCodes::MaxTimeMSExpired, "mock")); + network->scheduleResponse( + noi3, + now + Milliseconds(1000), + {createErrorResponse(Status(ErrorCodes::MaxTimeMSExpired, "mock")), Milliseconds(1)}); } else { - network->scheduleErrorResponse(noi3, Status(ErrorCodes::MaxTimeMSExpired, "mock")); + network->scheduleResponse( + noi3, + now, + {createErrorResponse(Status(ErrorCodes::MaxTimeMSExpired, "mock")), Milliseconds(1)}); } network->runUntil(now + Milliseconds(1500)); @@ -361,10 +393,10 @@ TEST_F(HedgedCommandRunnerTest, AllCommandsFailWithSkippableError) { auto counters = network->getCounters(); network->exitNetwork(); - ASSERT_EQ(counters.failed, 3); + ASSERT_EQ(counters.succeeded, 3); ASSERT_EQ(counters.canceled, 0); - ASSERT_THROWS_CODE(resultFuture.get(), DBException, ErrorCodes::MaxTimeMSExpired); + ASSERT_THROWS_CODE(resultFuture.get(), DBException, ErrorCodes::RemoteCommandExecutionError); } /** @@ -407,7 +439,10 @@ TEST_F(HedgedCommandRunnerTest, FirstCommandFailsWithSkippableErrorNextSucceeds) if (firstRequest.target[0] == kThreeHosts[0]) { network->scheduleSuccessfulResponse(noi1, now + Milliseconds(1000), successResponse); } else { - network->scheduleErrorResponse(noi1, Status(ErrorCodes::MaxTimeMSExpired, "mock")); + network->scheduleResponse( + noi1, + now, + {createErrorResponse(Status(ErrorCodes::MaxTimeMSExpired, "mock")), Milliseconds(1)}); } // if the second request is the authoritative one, send a delayed success response @@ -415,7 +450,10 @@ TEST_F(HedgedCommandRunnerTest, FirstCommandFailsWithSkippableErrorNextSucceeds) if (secondRequest.target[0] == kThreeHosts[0]) { network->scheduleSuccessfulResponse(noi2, now + Milliseconds(1000), successResponse); } else { - network->scheduleErrorResponse(noi2, Status(ErrorCodes::MaxTimeMSExpired, "mock")); + network->scheduleResponse( + noi2, + now, + {createErrorResponse(Status(ErrorCodes::MaxTimeMSExpired, "mock")), Milliseconds(1)}); } network->runUntil(now + Milliseconds(1500)); @@ -423,8 +461,7 @@ TEST_F(HedgedCommandRunnerTest, FirstCommandFailsWithSkippableErrorNextSucceeds) auto counters = network->getCounters(); network->exitNetwork(); - ASSERT_EQ(counters.failed, 1); - ASSERT_EQ(counters.succeeded, 1); + ASSERT_EQ(counters.succeeded, 2); ASSERT_EQ(counters.canceled, 0); ASSERT_EQ(resultFuture.get().response.getCursor()->getNs(), diff --git a/src/mongo/executor/network_interface_mock.cpp b/src/mongo/executor/network_interface_mock.cpp index 454e7be6fa1..962fb43851d 100644 --- a/src/mongo/executor/network_interface_mock.cpp +++ b/src/mongo/executor/network_interface_mock.cpp @@ -160,7 +160,6 @@ void NetworkInterfaceMock::_interruptWithResponse_inlock(const CallbackHandle& c if (!noi->isFinished()) { // We've effectively observed the NetworkOperation. noi->markAsProcessing(); - _counters.canceled++; _scheduleResponse_inlock(noi, _now_inlock(), response); } } @@ -604,13 +603,6 @@ void NetworkInterfaceMock::_runReadyNetworkOperations_inlock(stdx::unique_lock<s "request"_attr = noi->getRequest(), "response"_attr = response.response); - _counters.sent++; - if (response.response.status.isOK()) { - _counters.succeeded++; - } else { - _counters.failed++; - } - if (_metadataHook) { _metadataHook ->readReplyMetadata(noi->getRequest().opCtx, @@ -619,8 +611,27 @@ void NetworkInterfaceMock::_runReadyNetworkOperations_inlock(stdx::unique_lock<s .transitional_ignore(); } - noi->processResponse(std::move(response)); - + // The NetworkInterface can recieve multiple responses for a particular request (e.g. + // cancellation and a 'true' scheduled response). But each request can only have one logical + // response. This choice of the one logical response is mediated by the _isFinished field of + // the NetworkOperation; whichever response sets this first via + // NetworkOperation::processResponse wins. NetworkOperation::processResponse returns `true` + // if the given response was accepted by the NetworkOperation as its sole logical response. + // + // We care about this here because we only want to increment the counters for operations + // succeeded/failed for the responses that are actually used, + Status localResponseStatus = response.response.status; + bool noiUsedThisResponse = noi->processResponse(std::move(response)); + if (noiUsedThisResponse) { + _counters.sent++; + if (localResponseStatus.isOK()) { + _counters.succeeded++; + } else if (ErrorCodes::isCancellationError(localResponseStatus)) { + _counters.canceled++; + } else { + _counters.failed++; + } + } lk->lock(); } invariant(_currentlyRunning == kNetworkThread); diff --git a/src/mongo/executor/network_interface_tl.cpp b/src/mongo/executor/network_interface_tl.cpp index f6979f18462..659278741b1 100644 --- a/src/mongo/executor/network_interface_tl.cpp +++ b/src/mongo/executor/network_interface_tl.cpp @@ -41,6 +41,7 @@ #include "mongo/executor/network_interface_tl_gen.h" #include "mongo/logv2/log.h" #include "mongo/rpc/get_status_from_command_result.h" +#include "mongo/s/hedge_options_util.h" #include "mongo/transport/transport_layer_manager.h" #include "mongo/util/concurrency/idle_thread_block.h" #include "mongo/util/net/socket_utils.h" @@ -102,19 +103,6 @@ bool catchingInvoke(F&& f, EH&& eh, StringData hint) { } } -/** - * We ignore a subset of errors that may occur while running hedged operations (e.g., maxTimeMS - * expiration), as the operation may safely succeed despite their failure. For example, a network - * timeout error indicates the remote host experienced a timeout while running a remote-command as - * part of executing the hedged operation. This is by no means an indication that the operation has - * failed, as other hedged operations may still succeed. - * TODO SERVER-68704 will include other error categories that are safe to ignore. - */ -bool skipHedgeResult(const Status& status) { - return status == ErrorCodes::MaxTimeMSExpired || status == ErrorCodes::StaleDbVersion || - ErrorCodes::isNetworkTimeoutError(status) || ErrorCodes::isStaleShardVersionError(status); -} - template <typename IA, typename IB, typename F> int compareTransformed(IA a1, IA a2, IB b1, IB b2, F&& f) { for (;; ++a1, ++b1) @@ -125,7 +113,6 @@ int compareTransformed(IA a1, IA a2, IB b1, IB b2, F&& f) { else if (int r = f(*a1) - f(*b1)) return r; } - } // namespace /** @@ -949,7 +936,7 @@ void NetworkInterfaceTL::RequestState::resolve(Future<RemoteCommandResponse> fut returnConnection(status); const auto commandStatus = getStatusFromCommandResult(response.data); - if (isHedge && skipHedgeResult(commandStatus)) { + if (isHedge && isIgnorableAsHedgeResult(commandStatus)) { LOGV2_DEBUG(4660701, 2, "Hedged request returned status", diff --git a/src/mongo/executor/remote_command_runner.cpp b/src/mongo/executor/remote_command_runner.cpp index 7c7c9280014..6b909689094 100644 --- a/src/mongo/executor/remote_command_runner.cpp +++ b/src/mongo/executor/remote_command_runner.cpp @@ -30,6 +30,7 @@ #include "mongo/executor/remote_command_runner.h" #include "mongo/base/error_codes.h" #include "mongo/executor/remote_command_request.h" +#include "mongo/executor/remote_command_runner_error_info.h" #include "mongo/util/assert_util.h" #include "mongo/util/future.h" #include "mongo/util/net/hostandport.h" @@ -38,6 +39,14 @@ namespace mongo::executor::remote_command_runner { namespace detail { namespace { +Status makeErrorIfNeeded(TaskExecutor::ResponseOnAnyStatus r) { + if (r.status.isOK() && getStatusFromCommandResult(r.data).isOK() && + getWriteConcernStatusFromCommandResult(r.data).isOK() && + getFirstWriteErrorStatusFromCommandResult(r.data).isOK()) { + return Status::OK(); + } + return {RemoteCommandExecutionErrorInfo(r), "Remote command execution failed"}; +} const auto getRCRImpl = ServiceContext::declareDecoration<std::unique_ptr<RemoteCommandRunner>>(); } // namespace @@ -65,14 +74,16 @@ public: targets, dbName.toString(), cmdBSON, rpc::makeEmptyMetadata(), opCtx); return exec->scheduleRemoteCommandOnAny(executorRequest, token); }) - .then([&, exec = std::move(exec)](RemoteCommandOnAnyResponse r) { - // Ensure the command didn't have a local error, or any remote errors, preferring - // to propogate ok: 0 errors over writeConcern errors over write errors. - iassert(r.status); - iassert(getStatusFromCommandResult(r.data)); - iassert(getWriteConcernStatusFromCommandResult(r.data)); - iassert(getFirstWriteErrorStatusFromCommandResult(r.data)); - + .onError([](Status s) -> StatusWith<TaskExecutor::ResponseOnAnyStatus> { + // If there was a scheduling error or other local error before the + // command was accepted by the executor. + return Status{RemoteCommandExecutionErrorInfo(s), + "Remote command execution failed"}; + }) + .then([](TaskExecutor::ResponseOnAnyStatus r) { + // TODO SERVER-69592 account for interior executor shutdown + auto s = makeErrorIfNeeded(r); + uassertStatusOK(s); return RemoteCommandInternalResponse{r.data, r.target.get()}; }); } diff --git a/src/mongo/executor/remote_command_runner_error_info.cpp b/src/mongo/executor/remote_command_runner_error_info.cpp new file mode 100644 index 00000000000..04a34cec8bb --- /dev/null +++ b/src/mongo/executor/remote_command_runner_error_info.cpp @@ -0,0 +1,50 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + + +#include "mongo/executor/remote_command_runner_error_info.h" +#include "mongo/base/init.h" +#include "mongo/bson/bsonobjbuilder.h" + +namespace mongo { +namespace { + +MONGO_INIT_REGISTER_ERROR_EXTRA_INFO(RemoteCommandExecutionErrorInfo); + +} // namespace + +std::shared_ptr<const ErrorExtraInfo> RemoteCommandExecutionErrorInfo::parse(const BSONObj& obj) { + MONGO_UNREACHABLE; +} + +void RemoteCommandExecutionErrorInfo::serialize(BSONObjBuilder* bob) const { + MONGO_UNREACHABLE; +} + +} // namespace mongo diff --git a/src/mongo/executor/remote_command_runner_error_info.h b/src/mongo/executor/remote_command_runner_error_info.h new file mode 100644 index 00000000000..b51936442cc --- /dev/null +++ b/src/mongo/executor/remote_command_runner_error_info.h @@ -0,0 +1,151 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/base/error_extra_info.h" +#include "mongo/bson/bsonobj.h" +#include "mongo/executor/remote_command_response.h" +#include "mongo/rpc/get_status_from_command_result.h" +#include "mongo/stdx/variant.h" + +namespace mongo { +class BSONObjBuilder; + +enum class CommandErrorProvenance { kLocal, kRemote }; + + +/** + * Contains information to augment the 'RemoteCommandExecutionError' error code. In particular, this + * class holds the provenance and data of the underlying error(s). + */ +class RemoteCommandExecutionErrorInfo final : public ErrorExtraInfo { +public: + /** + * Nested class used to describe remote errors. + */ + class RemoteError { + public: + // The BSON recieved over-the-wire that encodes the remote + // error is stored and used to construct this in-memory representation. + explicit RemoteError(BSONObj obj) + : _error{obj}, + _remoteCommandResult{getStatusFromCommandResult(_error)}, + _remoteCommandWriteConcernError{getWriteConcernStatusFromCommandResult(_error)}, + _remoteCommandFirstWriteError{getFirstWriteErrorStatusFromCommandResult(_error)} { + // The buffer backing the default empty BSONObj has static duration so it is effectively + // owned. + invariant(_error.isOwned() || _error.objdata() == BSONObj().objdata()); + if (BSONElement errLabelsElem = _error["errorLabels"]; !errLabelsElem.eoo()) { + _errLabels = errLabelsElem.Array(); + } + } + + Status getRemoteCommandResult() const { + return _remoteCommandResult; + } + + Status getRemoteCommandWriteConcernError() const { + return _remoteCommandWriteConcernError; + } + + Status getRemoteCommandFirstWriteError() const { + return _remoteCommandFirstWriteError; + } + + BSONObj getResponseObj() const { + return _error; + } + + std::vector<BSONElement> getErrorLabels() const { + return _errLabels; + } + + private: + const BSONObj _error; + Status _remoteCommandResult; + Status _remoteCommandWriteConcernError; + Status _remoteCommandFirstWriteError; + std::vector<BSONElement> _errLabels; + }; + + // Required member of every ErrorExtraInfo. + static constexpr auto code = ErrorCodes::RemoteCommandExecutionError; + + // Unused and marked unreachable - the RemoteCommandExecutionError is InternalOnly and should + // never be encoded in a BSONObj / recieved or sent over-the-wire. + static std::shared_ptr<const ErrorExtraInfo> parse(const BSONObj& obj); + + /** + * Construct the relevant extra info from the RemoteCommandResponse provided by the TaskExecutor + * used to invoke the remote command. + */ + explicit RemoteCommandExecutionErrorInfo(executor::RemoteCommandOnAnyResponse rcr) + : _error{RemoteError(rcr.data)} { + if (!rcr.status.isOK()) { + _prov = CommandErrorProvenance::kLocal; + _error = rcr.status; + return; + } + _prov = CommandErrorProvenance::kRemote; + } + + /** + * Construct the relevant extra info from an error status - used if a remote command invokation + * attempt fails before it reaches the TaskExecutor level. + */ + explicit RemoteCommandExecutionErrorInfo(Status s) + : _prov{CommandErrorProvenance::kLocal}, _error{s} {} + + bool isLocal() const { + return _prov == CommandErrorProvenance::kLocal; + } + + bool isRemote() const { + return _prov == CommandErrorProvenance::kRemote; + } + + const RemoteError& asRemote() const { + return stdx::get<const RemoteError>(_error); + } + + Status asLocal() const { + return stdx::get<Status>(_error); + } + + // Unused and marked unreachable - the RemoteCommandExecutionError is InternalOnly and should + // never be encoded in a BSONObj / recieved or sent over-the-wire. + void serialize(BSONObjBuilder* bob) const final; + +private: + CommandErrorProvenance _prov; + stdx::variant<Status, const RemoteError> _error; +}; + +} // namespace mongo diff --git a/src/mongo/executor/remote_command_runner_test.cpp b/src/mongo/executor/remote_command_runner_test.cpp index 281397ed22b..d4560933e3c 100644 --- a/src/mongo/executor/remote_command_runner_test.cpp +++ b/src/mongo/executor/remote_command_runner_test.cpp @@ -27,15 +27,15 @@ * it in the license file. */ +#include "mongo/bson/oid.h" +#include "mongo/db/repl/hello_gen.h" +#include "mongo/executor/network_test_env.h" #include "mongo/executor/remote_command_response.h" #include "mongo/executor/remote_command_retry_policy.h" #include "mongo/executor/remote_command_runner.h" +#include "mongo/executor/remote_command_runner_error_info.h" #include "mongo/executor/remote_command_runner_test_fixture.h" #include "mongo/executor/remote_command_targeter.h" - -#include "mongo/bson/oid.h" -#include "mongo/db/repl/hello_gen.h" -#include "mongo/executor/network_test_env.h" #include "mongo/executor/task_executor.h" #include "mongo/executor/task_executor_test_fixture.h" #include "mongo/executor/thread_pool_task_executor.h" @@ -97,7 +97,17 @@ TEST_F(RemoteCommandRunnerTestFixture, LocalError) { return Status(ErrorCodes::NetworkTimeout, "mock"); }); - ASSERT_THROWS_CODE(resultFuture.get(), DBException, ErrorCodes::NetworkTimeout); + auto error = resultFuture.getNoThrow().getStatus(); + + // The error returned by our API should always be RemoteCommandExecutionError + ASSERT_EQ(error.code(), ErrorCodes::RemoteCommandExecutionError); + // Make sure we can extract the extra error info + auto extraInfo = error.extraInfo<RemoteCommandExecutionErrorInfo>(); + ASSERT(extraInfo); + // Make sure the extra info indicates the error was local, and that the + // local error (which is just a Status) has the correct code. + ASSERT(extraInfo->isLocal()); + ASSERT_EQ(extraInfo->asLocal().code(), ErrorCodes::NetworkTimeout); } /* @@ -119,7 +129,19 @@ TEST_F(RemoteCommandRunnerTestFixture, RemoteError) { return createErrorResponse(Status(ErrorCodes::BadValue, "mock")); }); - ASSERT_THROWS_CODE(resultFuture.get(), DBException, ErrorCodes::BadValue); + auto error = resultFuture.getNoThrow().getStatus(); + ASSERT_EQ(error.code(), ErrorCodes::RemoteCommandExecutionError); + + auto extraInfo = error.extraInfo<RemoteCommandExecutionErrorInfo>(); + ASSERT(extraInfo); + + ASSERT(extraInfo->isRemote()); + auto remoteError = extraInfo->asRemote(); + ASSERT_EQ(remoteError.getRemoteCommandResult(), Status(ErrorCodes::BadValue, "mock")); + + // No write concern or write errors expected + ASSERT_EQ(remoteError.getRemoteCommandWriteConcernError(), Status::OK()); + ASSERT_EQ(remoteError.getRemoteCommandFirstWriteError(), Status::OK()); } /* @@ -146,7 +168,20 @@ TEST_F(RemoteCommandRunnerTestFixture, WriteConcernError) { return resWithWriteConcernError; }); - ASSERT_THROWS_CODE(resultFuture.get(), DBException, ErrorCodes::WriteConcernFailed); + auto error = resultFuture.getNoThrow().getStatus(); + ASSERT_EQ(error.code(), ErrorCodes::RemoteCommandExecutionError); + + auto extraInfo = error.extraInfo<RemoteCommandExecutionErrorInfo>(); + ASSERT(extraInfo); + + ASSERT(extraInfo->isRemote()); + auto remoteError = extraInfo->asRemote(); + ASSERT_EQ(remoteError.getRemoteCommandWriteConcernError(), + Status(ErrorCodes::WriteConcernFailed, "mock")); + + // No top-level command or write errors expected + ASSERT_EQ(remoteError.getRemoteCommandFirstWriteError(), Status::OK()); + ASSERT_EQ(remoteError.getRemoteCommandResult(), Status::OK()); } /* @@ -173,8 +208,22 @@ TEST_F(RemoteCommandRunnerTestFixture, WriteError) { return resWithWriteError; }); - - ASSERT_THROWS_CODE(resultFuture.get(), DBException, ErrorCodes::DocumentValidationFailure); + auto error = resultFuture.getNoThrow().getStatus(); + ASSERT_EQ(error.code(), ErrorCodes::RemoteCommandExecutionError); + + auto extraInfo = error.extraInfo<RemoteCommandExecutionErrorInfo>(); + ASSERT(extraInfo); + + ASSERT(extraInfo->isRemote()); + auto remoteError = extraInfo->asRemote(); + ASSERT_EQ(remoteError.getRemoteCommandFirstWriteError(), + Status(ErrorCodes::DocumentValidationFailure, + "Document failed validation", + BSON("errInfo" << writeErrorExtraInfo))); + + // No top-level command or write errors expected + ASSERT_EQ(remoteError.getRemoteCommandWriteConcernError(), Status::OK()); + ASSERT_EQ(remoteError.getRemoteCommandResult(), Status::OK()); } /* |