From 088cc12eb9277e4e49797cceb2e901335a6cb79f Mon Sep 17 00:00:00 2001 From: Kaloian Manassiev Date: Thu, 24 Mar 2022 16:33:04 +0100 Subject: Revert "SERVER-64520 Get rid of WriteErrorDetail" This reverts commit 3c6e77a4a23df74b746653c3cd1ef9da67e7f9fa. --- .../catalog/sharding_catalog_write_retry_test.cpp | 8 +- .../s/commands/cluster_find_and_modify_cmd.cpp | 4 +- src/mongo/s/commands/cluster_write_cmd.cpp | 51 ++-- src/mongo/s/sessions_collection_sharded_test.cpp | 16 +- src/mongo/s/write_ops/SConscript | 1 + src/mongo/s/write_ops/batch_write_exec.cpp | 57 ++--- src/mongo/s/write_ops/batch_write_exec_test.cpp | 258 ++++++++++++++------- src/mongo/s/write_ops/batch_write_op.cpp | 96 ++++---- src/mongo/s/write_ops/batch_write_op.h | 14 +- src/mongo/s/write_ops/batch_write_op_test.cpp | 104 +++++---- src/mongo/s/write_ops/batched_command_response.cpp | 130 ++++++----- src/mongo/s/write_ops/batched_command_response.h | 30 +-- .../s/write_ops/batched_command_response_test.cpp | 185 +++++++-------- src/mongo/s/write_ops/write_error_detail.cpp | 187 +++++++++++++++ src/mongo/s/write_ops/write_error_detail.h | 107 +++++++++ src/mongo/s/write_ops/write_op.cpp | 119 +++++----- src/mongo/s/write_ops/write_op.h | 13 +- src/mongo/s/write_ops/write_op_test.cpp | 39 +++- 18 files changed, 928 insertions(+), 491 deletions(-) create mode 100644 src/mongo/s/write_ops/write_error_detail.cpp create mode 100644 src/mongo/s/write_ops/write_error_detail.h (limited to 'src/mongo/s') diff --git a/src/mongo/s/catalog/sharding_catalog_write_retry_test.cpp b/src/mongo/s/catalog/sharding_catalog_write_retry_test.cpp index 1c3373993f6..fab2bba0f62 100644 --- a/src/mongo/s/catalog/sharding_catalog_write_retry_test.cpp +++ b/src/mongo/s/catalog/sharding_catalog_write_retry_test.cpp @@ -482,8 +482,12 @@ TEST_F(UpdateRetryTest, OperationInterruptedDueToPrimaryStepDown) { BatchedCommandResponse response; response.setStatus(Status::OK()); - response.addToErrDetails(write_ops::WriteError( - 0, {ErrorCodes::InterruptedDueToReplStateChange, "Operation interrupted"})); + + auto writeErrDetail = std::make_unique(); + writeErrDetail->setIndex(0); + writeErrDetail->setStatus( + {ErrorCodes::InterruptedDueToReplStateChange, "Operation interrupted"}); + response.addToErrDetails(writeErrDetail.release()); return response.toBSON(); }); diff --git a/src/mongo/s/commands/cluster_find_and_modify_cmd.cpp b/src/mongo/s/commands/cluster_find_and_modify_cmd.cpp index 5931b4b1d73..619f34f3ca9 100644 --- a/src/mongo/s/commands/cluster_find_and_modify_cmd.cpp +++ b/src/mongo/s/commands/cluster_find_and_modify_cmd.cpp @@ -185,7 +185,7 @@ void handleWouldChangeOwningShardErrorRetryableWrite( (bodyStatus == ErrorCodes::DuplicateKey && !bodyStatus.extraInfo()->getKeyPattern().hasField("_id"))) { bodyStatus.addContext(documentShardKeyUpdateUtil::kNonDuplicateKeyErrorContext); - } + }; uassertStatusOK(bodyStatus); uassertStatusOK(swCommitResult.getValue().cmdStatus); @@ -622,7 +622,7 @@ private: (e.code() == ErrorCodes::DuplicateKey && !e.extraInfo()->getKeyPattern().hasField("_id"))) { e.addContext(documentShardKeyUpdateUtil::kNonDuplicateKeyErrorContext); - } + }; auto txnRouterForAbort = TransactionRouter::get(opCtx); if (txnRouterForAbort) diff --git a/src/mongo/s/commands/cluster_write_cmd.cpp b/src/mongo/s/commands/cluster_write_cmd.cpp index ef94ff445fd..286002b107c 100644 --- a/src/mongo/s/commands/cluster_write_cmd.cpp +++ b/src/mongo/s/commands/cluster_write_cmd.cpp @@ -68,27 +68,28 @@ void batchErrorToNotPrimaryErrorTracker(const BatchedCommandRequest& request, NotPrimaryErrorTracker* tracker) { tracker->reset(); - boost::optional commandStatus; - Status const* lastBatchStatus = nullptr; + std::unique_ptr commandError; + WriteErrorDetail* lastBatchError = nullptr; if (!response.getOk()) { // Command-level error, all writes failed - commandStatus = response.getTopLevelStatus(); - lastBatchStatus = commandStatus.get_ptr(); + commandError = std::make_unique(); + commandError->setStatus(response.getTopLevelStatus()); + lastBatchError = commandError.get(); } else if (response.isErrDetailsSet()) { // The last error in the batch is always reported - this matches expected COE semantics for // insert batches. For updates and deletes, error is only reported if the error was on the // last item. - const bool lastOpErrored = response.getErrDetails().back().getIndex() == + const bool lastOpErrored = response.getErrDetails().back()->getIndex() == static_cast(request.sizeWriteOps() - 1); if (request.getBatchType() == BatchedCommandRequest::BatchType_Insert || lastOpErrored) { - lastBatchStatus = &response.getErrDetails().back().getStatus(); + lastBatchError = response.getErrDetails().back(); } } // Record an error if one exists - if (lastBatchStatus) { - tracker->recordError(lastBatchStatus->code()); + if (lastBatchError) { + tracker->recordError(lastBatchError->toStatus().code()); } } @@ -113,7 +114,7 @@ boost::optional getWouldChangeOwningShardErrorInfo( if (request.sizeWriteOps() != 1U) { for (auto it = response->getErrDetails().begin(); it != response->getErrDetails().end(); ++it) { - if (it->getStatus() != ErrorCodes::WouldChangeOwningShard) { + if ((*it)->toStatus() != ErrorCodes::WouldChangeOwningShard) { continue; } @@ -122,20 +123,20 @@ boost::optional getWouldChangeOwningShardErrorInfo( "Document shard key value updates that cause the doc to move shards " "must be sent with write batch of size 1"); - it->setStatus({ErrorCodes::InvalidOptions, - "Document shard key value updates that cause the doc to move shards " - "must be sent with write batch of size 1"}); + (*it)->setStatus({ErrorCodes::InvalidOptions, + "Document shard key value updates that cause the doc to move shards " + "must be sent with write batch of size 1"}); } return boost::none; } else { for (const auto& err : response->getErrDetails()) { - if (err.getStatus() != ErrorCodes::WouldChangeOwningShard) { + if (err->toStatus() != ErrorCodes::WouldChangeOwningShard) { continue; } BSONObjBuilder extraInfoBuilder; - err.getStatus().extraInfo()->serialize(&extraInfoBuilder); + err->toStatus().extraInfo()->serialize(&extraInfoBuilder); auto extraInfo = extraInfoBuilder.obj(); return WouldChangeOwningShardInfo::parseFromCommandError(extraInfo); } @@ -195,9 +196,12 @@ void handleWouldChangeOwningShardErrorRetryableWrite(OperationContext* opCtx, (bodyStatus == ErrorCodes::DuplicateKey && !bodyStatus.extraInfo()->getKeyPattern().hasField("_id"))) { bodyStatus.addContext(documentShardKeyUpdateUtil::kNonDuplicateKeyErrorContext); - } + }; - response->addToErrDetails({0, bodyStatus}); + auto error = std::make_unique(); + error->setIndex(0); + error->setStatus(bodyStatus); + response->addToErrDetails(error.release()); return; } @@ -363,16 +367,16 @@ bool handleWouldChangeOwningShardError(OperationContext* opCtx, e.addContext(documentShardKeyUpdateUtil::kNonDuplicateKeyErrorContext); } - if (!response->isErrDetailsSet()) { - response->addToErrDetails( - {0, - Status(ErrorCodes::InternalError, "Will be replaced by the code below")}); + if (!response->isErrDetailsSet() || !response->getErrDetails().back()) { + auto error = std::make_unique(); + error->setIndex(0); + response->addToErrDetails(error.release()); } // Set the error status to the status of the failed command and abort the - // transaction + // transaction. auto status = e.toStatus(); - response->getErrDetails().back().setStatus(status); + response->getErrDetails().back()->setStatus(status); auto txnRouterForAbort = TransactionRouter::get(opCtx); if (txnRouterForAbort) @@ -442,7 +446,6 @@ void updateHostsTargetedMetrics(OperationContext* opCtx, opCtx, nShardsTargeted, nShardsOwningChunks); NumHostsTargetedMetrics::get(opCtx).addNumHostsTargeted(writeType, targetType); } - } // namespace void ClusterWriteCmd::_commandOpWrite(OperationContext* opCtx, @@ -549,7 +552,7 @@ bool ClusterWriteCmd::InvocationBase::runImpl(OperationContext* opCtx, } else if (batchedRequest.getWriteCommandRequestBase().getOrdered() && response.isErrDetailsSet()) { // Add one failed attempt - numAttempts = response.getErrDetailsAt(0).getIndex() + 1; + numAttempts = response.getErrDetailsAt(0)->getIndex() + 1; } else { numAttempts = batchedRequest.sizeWriteOps(); } diff --git a/src/mongo/s/sessions_collection_sharded_test.cpp b/src/mongo/s/sessions_collection_sharded_test.cpp index d82a4edfbfa..a5976752058 100644 --- a/src/mongo/s/sessions_collection_sharded_test.cpp +++ b/src/mongo/s/sessions_collection_sharded_test.cpp @@ -134,8 +134,12 @@ TEST_F(SessionsCollectionShardedTest, RefreshOneSessionWriteErrTest) { BatchedCommandResponse response; response.setStatus(Status::OK()); response.setNModified(0); - response.addToErrDetails( - write_ops::WriteError(0, {ErrorCodes::NotWritablePrimary, "not primary"})); + response.addToErrDetails([&] { + WriteErrorDetail* errDetail = new WriteErrorDetail(); + errDetail->setIndex(0); + errDetail->setStatus({ErrorCodes::NotWritablePrimary, "not primary"}); + return errDetail; + }()); return response.toBSON(); }); @@ -185,8 +189,12 @@ TEST_F(SessionsCollectionShardedTest, RemoveOneSessionWriteErrTest) { BatchedCommandResponse response; response.setStatus(Status::OK()); response.setNModified(0); - response.addToErrDetails( - write_ops::WriteError(0, {ErrorCodes::NotWritablePrimary, "not primary"})); + response.addToErrDetails([&] { + WriteErrorDetail* errDetail = new WriteErrorDetail(); + errDetail->setIndex(0); + errDetail->setStatus({ErrorCodes::NotWritablePrimary, "not primary"}); + return errDetail; + }()); return response.toBSON(); }); diff --git a/src/mongo/s/write_ops/SConscript b/src/mongo/s/write_ops/SConscript index b591f592405..5f493f3a06a 100644 --- a/src/mongo/s/write_ops/SConscript +++ b/src/mongo/s/write_ops/SConscript @@ -10,6 +10,7 @@ env.Library( 'batched_command_request.cpp', 'batched_command_response.cpp', 'batched_upsert_detail.cpp', + 'write_error_detail.cpp', ], LIBDEPS=[ '$BUILD_DIR/mongo/base', diff --git a/src/mongo/s/write_ops/batch_write_exec.cpp b/src/mongo/s/write_ops/batch_write_exec.cpp index 99ed846f7f2..d5c09b91f04 100644 --- a/src/mongo/s/write_ops/batch_write_exec.cpp +++ b/src/mongo/s/write_ops/batch_write_exec.cpp @@ -54,6 +54,17 @@ namespace { const ReadPreferenceSetting kPrimaryOnlyReadPreference(ReadPreference::PrimaryOnly); +// +// Map which allows associating ConnectionString hosts with TargetedWriteBatches +// This is needed since the dispatcher only returns hosts with responses. +// + +WriteErrorDetail errorFromStatus(const Status& status) { + WriteErrorDetail error; + error.setStatus(status); + return error; +} + // Helper to note several stale shard errors from a response void noteStaleShardResponses(OperationContext* opCtx, const std::vector& staleErrors, @@ -64,11 +75,12 @@ void noteStaleShardResponses(OperationContext* opCtx, "Noting stale config response from {shardId}: {errorInfo}", "Noting stale config response", "shardId"_attr = error.endpoint.shardName, - "status"_attr = error.error.getStatus()); - - auto extraInfo = error.error.getStatus().extraInfo(); - invariant(extraInfo); - targeter->noteStaleShardResponse(opCtx, error.endpoint, *extraInfo); + "errorInfo"_attr = error.error.getErrInfo()); + targeter->noteStaleShardResponse( + opCtx, + error.endpoint, + StaleConfigInfo::parseFromCommandError( + error.error.isErrInfoSet() ? error.error.getErrInfo() : BSONObj())); } } @@ -81,10 +93,11 @@ void noteStaleDbResponses(OperationContext* opCtx, 4, "Noting stale database response", "shardId"_attr = error.endpoint.shardName, - "status"_attr = error.error.getStatus()); - auto extraInfo = error.error.getStatus().extraInfo(); - invariant(extraInfo); - targeter->noteStaleDbResponse(opCtx, error.endpoint, *extraInfo); + "errorInfo"_attr = error.error); + targeter->noteStaleDbResponse( + opCtx, + error.endpoint, + StaleDbRoutingVersion::parseFromCommandError(error.error.toBSON())); } } @@ -279,7 +292,6 @@ void BatchWriteExec::executeBatch(OperationContext* opCtx, if (responseStatus.isOK()) { TrackedErrors trackedErrors; - trackedErrors.startTracking(ErrorCodes::StaleConfig); trackedErrors.startTracking(ErrorCodes::StaleShardVersion); trackedErrors.startTracking(ErrorCodes::StaleDbVersion); trackedErrors.startTracking(ErrorCodes::TenantMigrationAborted); @@ -318,26 +330,20 @@ void BatchWriteExec::executeBatch(OperationContext* opCtx, } // Note if anything was stale - auto staleConfigErrors = trackedErrors.getErrors(ErrorCodes::StaleConfig); - { - const auto& staleShardVersionErrors = - trackedErrors.getErrors(ErrorCodes::StaleShardVersion); - staleConfigErrors.insert(staleConfigErrors.begin(), - staleShardVersionErrors.begin(), - staleShardVersionErrors.end()); - } + const auto& staleShardErrors = + trackedErrors.getErrors(ErrorCodes::StaleShardVersion); const auto& staleDbErrors = trackedErrors.getErrors(ErrorCodes::StaleDbVersion); const auto& tenantMigrationAbortedErrors = trackedErrors.getErrors(ErrorCodes::TenantMigrationAborted); - if (!staleConfigErrors.empty()) { + if (!staleShardErrors.empty()) { invariant(staleDbErrors.empty()); - noteStaleShardResponses(opCtx, staleConfigErrors, &targeter); + noteStaleShardResponses(opCtx, staleShardErrors, &targeter); ++stats->numStaleShardBatches; } if (!staleDbErrors.empty()) { - invariant(staleConfigErrors.empty()); + invariant(staleShardErrors.empty()); noteStaleDbResponses(opCtx, staleDbErrors, &targeter); ++stats->numStaleDbBatches; } @@ -375,7 +381,7 @@ void BatchWriteExec::executeBatch(OperationContext* opCtx, : "from failing to target a host in the shard ") << shardInfo); - batchOp.noteBatchError(*batch, write_ops::WriteError(0, status)); + batchOp.noteBatchError(*batch, errorFromStatus(status)); LOGV2_DEBUG(22908, 4, @@ -431,8 +437,8 @@ void BatchWriteExec::executeBatch(OperationContext* opCtx, {logv2::LogComponent::kShardMigrationPerf}, "Finished post-migration commit refresh on the router with error", "error"_attr = redact(ex)); - batchOp.abortBatch(write_ops::WriteError( - 0, ex.toStatus("collection was dropped in the middle of the operation"))); + batchOp.abortBatch(errorFromStatus( + ex.toStatus("collection was dropped in the middle of the operation"))); break; } catch (const DBException& ex) { LOGV2_DEBUG_OPTIONS(4817409, @@ -460,8 +466,7 @@ void BatchWriteExec::executeBatch(OperationContext* opCtx, numCompletedOps = currCompletedOps; if (numRoundsWithoutProgress > kMaxRoundsWithoutProgress) { - batchOp.abortBatch(write_ops::WriteError( - 0, + batchOp.abortBatch(errorFromStatus( {ErrorCodes::NoProgressMade, str::stream() << "no progress was made executing batch write op in " << clientRequest.getNS().ns() << " after " diff --git a/src/mongo/s/write_ops/batch_write_exec_test.cpp b/src/mongo/s/write_ops/batch_write_exec_test.cpp index 3b4079b4c84..0bbb85f6ff7 100644 --- a/src/mongo/s/write_ops/batch_write_exec_test.cpp +++ b/src/mongo/s/write_ops/batch_write_exec_test.cpp @@ -90,13 +90,20 @@ BSONObj expectInsertsReturnStaleVersionErrorsBase(const NamespaceString& nss, // Report a stale version error for each write in the batch. int i = 0; for (itInserted = inserted.begin(); itInserted != inserted.end(); ++itInserted) { - staleResponse.addToErrDetails( - write_ops::WriteError(i, - Status(StaleConfigInfo(nss, - ChunkVersion(1, 0, epoch, timestamp), - ChunkVersion(2, 0, epoch, timestamp), - ShardId(kShardName1)), - "Stale error"))); + WriteErrorDetail* error = new WriteErrorDetail; + error->setStatus({ErrorCodes::StaleShardVersion, ""}); + error->setErrInfo([&] { + StaleConfigInfo sci(nss, + ChunkVersion(1, 0, epoch, timestamp), + ChunkVersion(2, 0, epoch, timestamp), + ShardId(kShardName1)); + BSONObjBuilder builder; + sci.serialize(&builder); + return builder.obj(); + }()); + error->setIndex(i); + + staleResponse.addToErrDetails(error); ++i; } @@ -609,8 +616,8 @@ TEST_F(BatchWriteExecTest, SingleOpUnorderedError) { ASSERT(response.getOk()); ASSERT_EQ(0, response.getN()); ASSERT(response.isErrDetailsSet()); - ASSERT_EQ(errResponse.toStatus().code(), response.getErrDetailsAt(0).getStatus().code()); - ASSERT(response.getErrDetailsAt(0).getStatus().reason().find( + ASSERT_EQ(errResponse.toStatus().code(), response.getErrDetailsAt(0)->toStatus().code()); + ASSERT(response.getErrDetailsAt(0)->toStatus().reason().find( errResponse.toStatus().reason()) != std::string::npos); ASSERT_EQ(1, stats.numRounds); @@ -725,13 +732,23 @@ TEST_F(BatchWriteExecTest, StaleShardVersionReturnedFromBatchWithSingleMultiWrit BatchedCommandResponse response; response.setStatus(Status::OK()); response.setNModified(0); - response.addToErrDetails( - write_ops::WriteError(0, - Status(StaleConfigInfo(nss, - ChunkVersion(101, 200, epoch, timestamp), - ChunkVersion(105, 200, epoch, timestamp), - ShardId(kShardName2)), - "Stale error"))); + response.addToErrDetails([&] { + WriteErrorDetail* errDetail = new WriteErrorDetail(); + errDetail->setIndex(0); + errDetail->setStatus({ErrorCodes::StaleShardVersion, "Stale shard version"}); + errDetail->setErrInfo([&] { + Status ssvStatus(StaleConfigInfo(nss, + ChunkVersion(101, 200, epoch, timestamp), + ChunkVersion(105, 200, epoch, timestamp), + ShardId(kShardName2)), + "Stale shard version"); + BSONObjBuilder builder; + ssvStatus.serializeErrorToBSON(&builder); + return builder.obj(); + }()); + return errDetail; + }()); + return response.toBSON(); }); @@ -823,20 +840,39 @@ TEST_F(BatchWriteExecTest, BatchedCommandResponse response; response.setStatus(Status::OK()); response.setNModified(0); - response.addToErrDetails( - write_ops::WriteError(0, - Status(StaleConfigInfo(nss, - ChunkVersion(101, 200, epoch, timestamp), - ChunkVersion(105, 200, epoch, timestamp), - ShardId(kShardName2)), - "Stale error"))); - response.addToErrDetails( - write_ops::WriteError(1, - Status(StaleConfigInfo(nss, - ChunkVersion(101, 200, epoch, timestamp), - ChunkVersion(105, 200, epoch, timestamp), - ShardId(kShardName2)), - "Stale error"))); + response.addToErrDetails([&] { + WriteErrorDetail* errDetail = new WriteErrorDetail(); + errDetail->setIndex(0); + errDetail->setStatus({ErrorCodes::StaleShardVersion, "Stale shard version"}); + errDetail->setErrInfo([&] { + Status ssvStatus(StaleConfigInfo(nss, + ChunkVersion(101, 200, epoch, timestamp), + ChunkVersion(105, 200, epoch, timestamp), + ShardId(kShardName2)), + "Stale shard version"); + BSONObjBuilder builder; + ssvStatus.serializeErrorToBSON(&builder); + return builder.obj(); + }()); + return errDetail; + }()); + response.addToErrDetails([&] { + WriteErrorDetail* errDetail = new WriteErrorDetail(); + errDetail->setIndex(1); + errDetail->setStatus({ErrorCodes::StaleShardVersion, "Stale shard version"}); + errDetail->setErrInfo([&] { + Status ssvStatus(StaleConfigInfo(nss, + ChunkVersion(101, 200, epoch, timestamp), + ChunkVersion(105, 200, epoch, timestamp), + ShardId(kShardName2)), + "Stale shard version"); + BSONObjBuilder builder; + ssvStatus.serializeErrorToBSON(&builder); + return builder.obj(); + }()); + return errDetail; + }()); + return response.toBSON(); }); @@ -917,13 +953,23 @@ TEST_F(BatchWriteExecTest, RetryableErrorReturnedFromMultiWriteWithShard1Firs) { BatchedCommandResponse response; response.setStatus(Status::OK()); response.setNModified(0); - response.addToErrDetails( - write_ops::WriteError(1, - Status(StaleConfigInfo(nss, - ChunkVersion(101, 200, epoch, timestamp), - ChunkVersion(105, 200, epoch, timestamp), - ShardId(kShardName2)), - "Stale error"))); + response.addToErrDetails([&] { + WriteErrorDetail* errDetail = new WriteErrorDetail(); + errDetail->setIndex(1); + errDetail->setStatus({ErrorCodes::StaleShardVersion, "Stale shard version"}); + errDetail->setErrInfo([&] { + Status ssvStatus(StaleConfigInfo(nss, + ChunkVersion(101, 200, epoch, timestamp), + ChunkVersion(105, 200, epoch, timestamp), + ShardId(kShardName1)), + "Stale shard version"); + BSONObjBuilder builder; + ssvStatus.serializeErrorToBSON(&builder); + return builder.obj(); + }()); + return errDetail; + }()); + return response.toBSON(); }); @@ -933,13 +979,23 @@ TEST_F(BatchWriteExecTest, RetryableErrorReturnedFromMultiWriteWithShard1Firs) { BatchedCommandResponse response; response.setStatus(Status::OK()); response.setNModified(0); - response.addToErrDetails( - write_ops::WriteError(0, - Status(StaleConfigInfo(nss, - ChunkVersion(101, 200, epoch, timestamp), - ChunkVersion(105, 200, epoch, timestamp), - ShardId(kShardName2)), - "Stale error"))); + response.addToErrDetails([&] { + WriteErrorDetail* errDetail = new WriteErrorDetail(); + errDetail->setIndex(0); + errDetail->setStatus({ErrorCodes::StaleShardVersion, "Stale shard version"}); + errDetail->setErrInfo([&] { + Status ssvStatus(StaleConfigInfo(nss, + ChunkVersion(101, 200, epoch, timestamp), + ChunkVersion(105, 200, epoch, timestamp), + ShardId(kShardName2)), + "Stale shard version"); + BSONObjBuilder builder; + ssvStatus.serializeErrorToBSON(&builder); + return builder.obj(); + }()); + return errDetail; + }()); + return response.toBSON(); }); @@ -1031,13 +1087,23 @@ TEST_F(BatchWriteExecTest, RetryableErrorReturnedFromMultiWriteWithShard1FirstOK BatchedCommandResponse response; response.setStatus(Status::OK()); response.setNModified(0); - response.addToErrDetails( - write_ops::WriteError(1, - Status(StaleConfigInfo(nss, - ChunkVersion(101, 200, epoch, timestamp), - ChunkVersion(105, 200, epoch, timestamp), - ShardId(kShardName2)), - "Stale error"))); + response.addToErrDetails([&] { + WriteErrorDetail* errDetail = new WriteErrorDetail(); + errDetail->setIndex(1); + errDetail->setStatus({ErrorCodes::StaleShardVersion, "Stale shard version"}); + errDetail->setErrInfo([&] { + Status ssvStatus(StaleConfigInfo(nss, + ChunkVersion(101, 200, epoch, timestamp), + ChunkVersion(105, 200, epoch, timestamp), + ShardId(kShardName1)), + "Stale shard version"); + BSONObjBuilder builder; + ssvStatus.serializeErrorToBSON(&builder); + return builder.obj(); + }()); + return errDetail; + }()); + return response.toBSON(); }); @@ -1047,13 +1113,23 @@ TEST_F(BatchWriteExecTest, RetryableErrorReturnedFromMultiWriteWithShard1FirstOK BatchedCommandResponse response; response.setStatus(Status::OK()); response.setNModified(0); - response.addToErrDetails( - write_ops::WriteError(1, - Status(StaleConfigInfo(nss, - ChunkVersion(101, 200, epoch, timestamp), - ChunkVersion(105, 200, epoch, timestamp), - ShardId(kShardName2)), - "Stale error"))); + response.addToErrDetails([&] { + WriteErrorDetail* errDetail = new WriteErrorDetail(); + errDetail->setIndex(1); + errDetail->setStatus({ErrorCodes::StaleShardVersion, "Stale shard version"}); + errDetail->setErrInfo([&] { + Status ssvStatus(StaleConfigInfo(nss, + ChunkVersion(101, 200, epoch, timestamp), + ChunkVersion(105, 200, epoch, timestamp), + ShardId(kShardName2)), + "Stale shard version"); + BSONObjBuilder builder; + ssvStatus.serializeErrorToBSON(&builder); + return builder.obj(); + }()); + return errDetail; + }()); + return response.toBSON(); }); @@ -1150,13 +1226,22 @@ TEST_F(BatchWriteExecTest, RetryableErrorReturnedFromWriteWithShard1SSVShard2OK) response.setStatus(Status::OK()); response.setNModified(0); response.setN(0); - response.addToErrDetails( - write_ops::WriteError(0, - Status(StaleConfigInfo(nss, - ChunkVersion(101, 200, epoch, timestamp), - ChunkVersion(105, 200, epoch, timestamp), - ShardId(kShardName2)), - "Stale error"))); + response.addToErrDetails([&] { + WriteErrorDetail* errDetail = new WriteErrorDetail(); + errDetail->setIndex(0); + errDetail->setStatus({ErrorCodes::StaleShardVersion, "Stale shard version"}); + errDetail->setErrInfo([&] { + Status ssvStatus(StaleConfigInfo(nss, + ChunkVersion(101, 200, epoch, timestamp), + ChunkVersion(105, 200, epoch, timestamp), + ShardId(kShardName1)), + "Migration happened"); + BSONObjBuilder builder; + ssvStatus.serializeErrorToBSON(&builder); + return builder.obj(); + }()); + return errDetail; + }()); // This simulates a migration of the last chunk on shard 1 to shard 2, which means that // future rounds on the batchExecutor should only target shard 2 @@ -1276,8 +1361,8 @@ TEST_F(BatchWriteExecTest, TooManyStaleShardOp) { ASSERT(response.getOk()); ASSERT_EQ(0, response.getN()); ASSERT(response.isErrDetailsSet()); - ASSERT_EQUALS(response.getErrDetailsAt(0).getStatus().code(), ErrorCodes::NoProgressMade); - ASSERT_EQUALS(response.getErrDetailsAt(1).getStatus().code(), ErrorCodes::NoProgressMade); + ASSERT_EQUALS(response.getErrDetailsAt(0)->toStatus().code(), ErrorCodes::NoProgressMade); + ASSERT_EQUALS(response.getErrDetailsAt(1)->toStatus().code(), ErrorCodes::NoProgressMade); ASSERT_EQUALS(stats.numStaleShardBatches, (1 + kMaxRoundsWithoutProgress)); }); @@ -1381,8 +1466,8 @@ TEST_F(BatchWriteExecTest, TooManyStaleDbOp) { ASSERT(response.getOk()); ASSERT_EQ(0, response.getN()); ASSERT(response.isErrDetailsSet()); - ASSERT_EQUALS(response.getErrDetailsAt(0).getStatus().code(), ErrorCodes::NoProgressMade); - ASSERT_EQUALS(response.getErrDetailsAt(1).getStatus().code(), ErrorCodes::NoProgressMade); + ASSERT_EQUALS(response.getErrDetailsAt(0)->toStatus().code(), ErrorCodes::NoProgressMade); + ASSERT_EQUALS(response.getErrDetailsAt(1)->toStatus().code(), ErrorCodes::NoProgressMade); ASSERT_EQUALS(stats.numStaleDbBatches, (1 + kMaxRoundsWithoutProgress)); }); @@ -1466,9 +1551,9 @@ TEST_F(BatchWriteExecTest, RetryableErrorNoTxnNumber) { ASSERT(response.getOk()); ASSERT_EQ(0, response.getN()); ASSERT(response.isErrDetailsSet()); - ASSERT_EQUALS(response.getErrDetailsAt(0).getStatus().code(), + ASSERT_EQUALS(response.getErrDetailsAt(0)->toStatus().code(), retryableErrResponse.toStatus().code()); - ASSERT(response.getErrDetailsAt(0).getStatus().reason().find( + ASSERT(response.getErrDetailsAt(0)->toStatus().reason().find( retryableErrResponse.toStatus().reason()) != std::string::npos); ASSERT_EQ(1, stats.numRounds); }); @@ -1546,9 +1631,9 @@ TEST_F(BatchWriteExecTest, NonRetryableErrorTxnNumber) { ASSERT(response.getOk()); ASSERT_EQ(0, response.getN()); ASSERT(response.isErrDetailsSet()); - ASSERT_EQUALS(response.getErrDetailsAt(0).getStatus().code(), + ASSERT_EQUALS(response.getErrDetailsAt(0)->toStatus().code(), nonRetryableErrResponse.toStatus().code()); - ASSERT(response.getErrDetailsAt(0).getStatus().reason().find( + ASSERT(response.getErrDetailsAt(0)->toStatus().reason().find( nonRetryableErrResponse.toStatus().reason()) != std::string::npos); ASSERT_EQ(1, stats.numRounds); }); @@ -1587,9 +1672,9 @@ TEST_F(BatchWriteExecTest, StaleEpochIsNotRetryable) { ASSERT(response.getOk()); ASSERT_EQ(0, response.getN()); ASSERT(response.isErrDetailsSet()); - ASSERT_EQUALS(response.getErrDetailsAt(0).getStatus().code(), + ASSERT_EQUALS(response.getErrDetailsAt(0)->toStatus().code(), nonRetryableErrResponse.toStatus().code()); - ASSERT(response.getErrDetailsAt(0).getStatus().reason().find( + ASSERT(response.getErrDetailsAt(0)->toStatus().reason().find( nonRetryableErrResponse.toStatus().reason()) != std::string::npos); ASSERT_EQ(1, stats.numRounds); }); @@ -1895,7 +1980,7 @@ TEST_F(BatchWriteExecTargeterErrorTest, TargetedFailedAndErrorResponse) { BatchWriteExec::executeBatch( operationContext(), multiShardNSTargeter, request, &response, &stats); ASSERT(response.isErrDetailsSet()); - auto code = response.getErrDetailsAt(0).getStatus().code(); + auto code = response.getErrDetailsAt(0)->toStatus().code(); ASSERT_EQUALS(code, ErrorCodes::MultipleErrorsOccurred); }); @@ -1904,8 +1989,13 @@ TEST_F(BatchWriteExecTargeterErrorTest, TargetedFailedAndErrorResponse) { BatchedCommandResponse response; response.setStatus(Status::OK()); - response.addToErrDetails( - write_ops::WriteError(0, {ErrorCodes::UnknownError, "mock non-retryable error"})); + response.addToErrDetails([&] { + WriteErrorDetail* errDetail = new WriteErrorDetail(); + errDetail->setIndex(0); + errDetail->setStatus({ErrorCodes::UnknownError, "mock non-retryable error"}); + return errDetail; + }()); + return response.toBSON(); }); @@ -2031,7 +2121,7 @@ TEST_F(BatchWriteExecTransactionTargeterErrorTest, TargetedFailedAndErrorRespons BatchWriteExec::executeBatch( operationContext(), multiShardNSTargeter, request, &response, &stats); ASSERT(response.isErrDetailsSet()); - auto code = response.getErrDetailsAt(0).getStatus().code(); + auto code = response.getErrDetailsAt(0)->toStatus().code(); ASSERT_EQUALS(code, ErrorCodes::ShardNotFound); }); @@ -2175,7 +2265,7 @@ TEST_F(BatchWriteExecTransactionMultiShardTest, TargetedSucceededAndErrorRespons BatchWriteExec::executeBatch( operationContext(), multiShardNSTargeter, request, &response, &stats); ASSERT(response.isErrDetailsSet()); - auto code = response.getErrDetailsAt(0).getStatus().code(); + auto code = response.getErrDetailsAt(0)->toStatus().code(); ASSERT_EQUALS(code, ErrorCodes::UnknownError); }); @@ -2326,7 +2416,7 @@ TEST_F(BatchWriteExecTransactionTest, ErrorInBatchThrows_CommandError) { ASSERT(response.isErrDetailsSet()); ASSERT_GT(response.sizeErrDetails(), 0u); - ASSERT_EQ(ErrorCodes::UnknownError, response.getErrDetailsAt(0).getStatus().code()); + ASSERT_EQ(ErrorCodes::UnknownError, response.getErrDetailsAt(0)->toStatus().code()); }); BatchedCommandResponse failedResponse; @@ -2358,7 +2448,7 @@ TEST_F(BatchWriteExecTransactionTest, ErrorInBatchSets_WriteError) { ASSERT(response.isErrDetailsSet()); ASSERT_GT(response.sizeErrDetails(), 0u); - ASSERT_EQ(ErrorCodes::StaleConfig, response.getErrDetailsAt(0).getStatus().code()); + ASSERT_EQ(ErrorCodes::StaleShardVersion, response.getErrDetailsAt(0)->toStatus().code()); }); // Any write error works, using SSV for convenience. @@ -2388,7 +2478,7 @@ TEST_F(BatchWriteExecTransactionTest, ErrorInBatchSets_WriteErrorOrdered) { ASSERT(response.isErrDetailsSet()); ASSERT_GT(response.sizeErrDetails(), 0u); - ASSERT_EQ(ErrorCodes::StaleConfig, response.getErrDetailsAt(0).getStatus().code()); + ASSERT_EQ(ErrorCodes::StaleShardVersion, response.getErrDetailsAt(0)->toStatus().code()); }); // Any write error works, using SSV for convenience. @@ -2447,7 +2537,7 @@ TEST_F(BatchWriteExecTransactionTest, ErrorInBatchSets_DispatchError) { ASSERT(response.isErrDetailsSet()); ASSERT_GT(response.sizeErrDetails(), 0u); - ASSERT_EQ(ErrorCodes::CallbackCanceled, response.getErrDetailsAt(0).getStatus().code()); + ASSERT_EQ(ErrorCodes::CallbackCanceled, response.getErrDetailsAt(0)->toStatus().code()); }); onCommandForPoolExecutor([&](const executor::RemoteCommandRequest& request) { diff --git a/src/mongo/s/write_ops/batch_write_op.cpp b/src/mongo/s/write_ops/batch_write_op.cpp index e21d598f698..c31b56992d6 100644 --- a/src/mongo/s/write_ops/batch_write_op.cpp +++ b/src/mongo/s/write_ops/batch_write_op.cpp @@ -48,10 +48,9 @@ namespace mongo { namespace { -struct WriteErrorComp { - bool operator()(const write_ops::WriteError& errorA, - const write_ops::WriteError& errorB) const { - return errorA.getIndex() < errorB.getIndex(); +struct WriteErrorDetailComp { + bool operator()(const WriteErrorDetail* errorA, const WriteErrorDetail* errorB) const { + return errorA->getIndex() < errorB->getIndex(); } }; @@ -86,6 +85,10 @@ BSONObj upgradeWriteConcern(const BSONObj& origWriteConcern) { return newWriteConcern.obj(); } +void buildTargetError(const Status& errStatus, WriteErrorDetail* details) { + details->setStatus(errStatus); +} + /** * Helper to determine whether a number of targeted writes require a new targeted batch. */ @@ -242,11 +245,11 @@ int getWriteSizeBytes(const WriteOp& writeOp) { * into a TrackedErrorMap */ void trackErrors(const ShardEndpoint& endpoint, - const std::vector& itemErrors, + const std::vector itemErrors, TrackedErrors* trackedErrors) { - for (auto&& error : itemErrors) { - if (trackedErrors->isTracking(error.getStatus().code())) { - trackedErrors->addError(ShardError(endpoint, error)); + for (const auto error : itemErrors) { + if (trackedErrors->isTracking(error->toStatus().code())) { + trackedErrors->addError(ShardError(endpoint, *error)); } } } @@ -256,24 +259,29 @@ void trackErrors(const ShardEndpoint& endpoint, * populated already, contacting the primary shard if necessary. */ void populateCollectionUUIDMismatch(OperationContext* opCtx, - write_ops::WriteError* error, + WriteErrorDetail* error, boost::optional* actualCollection, bool* hasContactedPrimaryShard) { - if (error->getStatus() != ErrorCodes::CollectionUUIDMismatch) { + auto status = error->toStatus(); + if (status.code() != ErrorCodes::CollectionUUIDMismatch) { return; } + auto info = status.extraInfo(); - auto info = error->getStatus().extraInfo(); if (info->actualCollection()) { return; } - if (*actualCollection) { + auto setErrorStatus = [&] { error->setStatus({CollectionUUIDMismatchInfo{info->db(), info->collectionUUID(), info->expectedCollection(), **actualCollection}, - error->getStatus().reason()}); + status.reason()}); + }; + + if (*actualCollection) { + setErrorStatus(); return; } @@ -282,7 +290,7 @@ void populateCollectionUUIDMismatch(OperationContext* opCtx, } // The listCollections command cannot be run in multi-document transactions, so run it using an - // alternative client. + // alterative client. auto client = opCtx->getServiceContext()->makeClient("populateCollectionUUIDMismatch"); auto alternativeOpCtx = client->makeOperationContext(); opCtx = alternativeOpCtx.get(); @@ -320,16 +328,10 @@ void populateCollectionUUIDMismatch(OperationContext* opCtx, if (auto actualCollectionElem = dotted_path_support::extractElementAtPath( response.swResponse.getValue().data, "cursor.firstBatch.0.name")) { *actualCollection = actualCollectionElem.str(); - error->setStatus({CollectionUUIDMismatchInfo{info->db(), - info->collectionUUID(), - info->expectedCollection(), - **actualCollection}, - error->getStatus().reason()}); - return; + setErrorStatus(); } *hasContactedPrimaryShard = true; - return; } } // namespace @@ -412,7 +414,8 @@ Status BatchWriteOp::targetBatch( } if (!targetStatus.isOK()) { - write_ops::WriteError targetError(0, targetStatus); + WriteErrorDetail targetError; + buildTargetError(targetStatus, &targetError); if (TransactionRouter::get(_opCtx)) { writeOp.setOpError(targetError); @@ -667,7 +670,8 @@ void BatchWriteOp::noteBatchResponse(const TargetedWriteBatch& targetedBatch, const BatchedCommandResponse& response, TrackedErrors* trackedErrors) { if (!response.getOk()) { - write_ops::WriteError error(0, response.getTopLevelStatus()); + WriteErrorDetail error; + error.setStatus(response.getTopLevelStatus()); // Treat command errors exactly like other failures of the batch. // @@ -692,7 +696,7 @@ void BatchWriteOp::noteBatchResponse(const TargetedWriteBatch& targetedBatch, _wcErrors.emplace_back(targetedBatch.getEndpoint(), *response.getWriteConcernError()); } - std::vector itemErrors; + std::vector itemErrors; // Handle batch and per-item errors if (response.isErrDetailsSet()) { @@ -701,7 +705,7 @@ void BatchWriteOp::noteBatchResponse(const TargetedWriteBatch& targetedBatch, itemErrors.begin(), response.getErrDetails().begin(), response.getErrDetails().end()); // Sort per-item errors by index - std::sort(itemErrors.begin(), itemErrors.end(), WriteErrorComp()); + std::sort(itemErrors.begin(), itemErrors.end(), WriteErrorDetailComp()); } // @@ -713,24 +717,25 @@ void BatchWriteOp::noteBatchResponse(const TargetedWriteBatch& targetedBatch, const bool ordered = _clientRequest.getWriteCommandRequestBase().getOrdered(); - auto itemErrorIt = itemErrors.begin(); + std::vector::iterator itemErrorIt = itemErrors.begin(); int index = 0; - write_ops::WriteError* lastError = nullptr; + WriteErrorDetail* lastError = nullptr; for (auto&& write : targetedBatch.getWrites()) { WriteOp& writeOp = _writeOps[write->writeOpRef.first]; - invariant(writeOp.getWriteState() == WriteOpState_Pending); + + dassert(writeOp.getWriteState() == WriteOpState_Pending); // See if we have an error for the write - write_ops::WriteError* writeError = nullptr; + WriteErrorDetail* writeError = nullptr; - if (itemErrorIt != itemErrors.end() && itemErrorIt->getIndex() == index) { + if (itemErrorIt != itemErrors.end() && (*itemErrorIt)->getIndex() == index) { // We have an per-item error for this write op's index - writeError = &(*itemErrorIt); + writeError = *itemErrorIt; ++itemErrorIt; } // Finish the response (with error, if needed) - if (!writeError) { + if (nullptr == writeError) { if (!ordered || !lastError) { writeOp.noteWriteComplete(*write); } else { @@ -773,7 +778,7 @@ void BatchWriteOp::noteBatchResponse(const TargetedWriteBatch& targetedBatch, } void BatchWriteOp::noteBatchError(const TargetedWriteBatch& targetedBatch, - const write_ops::WriteError& error) { + const WriteErrorDetail& error) { // Treat errors to get a batch response as failures of the contained writes BatchedCommandResponse emulatedResponse; emulatedResponse.setStatus(Status::OK()); @@ -784,15 +789,16 @@ void BatchWriteOp::noteBatchError(const TargetedWriteBatch& targetedBatch, : targetedBatch.getWrites().size(); for (int i = 0; i < numErrors; i++) { - write_ops::WriteError errorClone = error; - errorClone.setIndex(i); - emulatedResponse.addToErrDetails(std::move(errorClone)); + auto errorClone(std::make_unique()); + error.cloneTo(errorClone.get()); + errorClone->setIndex(i); + emulatedResponse.addToErrDetails(errorClone.release()); } noteBatchResponse(targetedBatch, emulatedResponse, nullptr); } -void BatchWriteOp::abortBatch(const write_ops::WriteError& error) { +void BatchWriteOp::abortBatch(const WriteErrorDetail& error) { dassert(!isFinished()); dassert(numWriteOpsIn(WriteOpState_Pending) == 0); @@ -870,26 +876,26 @@ void BatchWriteOp::buildClientResponse(BatchedCommandResponse* batchResp) { for (std::vector::iterator it = errOps.begin(); it != errOps.end(); ++it) { WriteOp& writeOp = **it; - write_ops::WriteError error = writeOp.getOpError(); - auto status = error.getStatus(); + WriteErrorDetail* error = new WriteErrorDetail(); + writeOp.getOpError().cloneTo(error); + batchResp->addToErrDetails(error); // For CollectionUUIDMismatch error, check if there is a response from a shard that // aleady has the actualCollection information. If there is none, make an additional // call to the primary shard to fetch this info in case the collection is unsharded or // the targeted shard does not own any chunk of the collection with the requested uuid. + auto status = error->toStatus(); if (!collectionUUIDMismatchActualCollection && status.code() == ErrorCodes::CollectionUUIDMismatch) { collectionUUIDMismatchActualCollection = status.extraInfo()->actualCollection(); } - - batchResp->addToErrDetails(std::move(error)); } bool hasContactedPrimaryShard = false; - for (auto& error : batchResp->getErrDetails()) { + for (auto error : batchResp->getErrDetails()) { populateCollectionUUIDMismatch( - _opCtx, &error, &collectionUUIDMismatchActualCollection, &hasContactedPrimaryShard); + _opCtx, error, &collectionUUIDMismatchActualCollection, &hasContactedPrimaryShard); } } @@ -978,7 +984,7 @@ void BatchWriteOp::_incBatchStats(const BatchedCommandResponse& response) { } } -void BatchWriteOp::_cancelBatches(const write_ops::WriteError& why, +void BatchWriteOp::_cancelBatches(const WriteErrorDetail& why, TargetedBatchMap&& batchMapToCancel) { // Collect all the writeOps that are currently targeted for (TargetedBatchMap::iterator it = batchMapToCancel.begin(); it != batchMapToCancel.end();) { @@ -1043,7 +1049,7 @@ bool TrackedErrors::isTracking(int errCode) const { } void TrackedErrors::addError(ShardError error) { - TrackedErrorMap::iterator seenIt = _errorMap.find(error.error.getStatus().code()); + TrackedErrorMap::iterator seenIt = _errorMap.find(error.error.toStatus().code()); if (seenIt == _errorMap.end()) return; seenIt->second.emplace_back(std::move(error)); diff --git a/src/mongo/s/write_ops/batch_write_op.h b/src/mongo/s/write_ops/batch_write_op.h index 38e924cd113..6aa41f23977 100644 --- a/src/mongo/s/write_ops/batch_write_op.h +++ b/src/mongo/s/write_ops/batch_write_op.h @@ -59,11 +59,12 @@ const int kWriteCommandBSONArrayPerElementOverheadBytes = 7; * Certain types of errors are not stored in WriteOps or must be returned to a caller. */ struct ShardError { - ShardError(const ShardEndpoint& endpoint, const write_ops::WriteError& error) - : endpoint(endpoint), error(error) {} + ShardError(const ShardEndpoint& endpoint, const WriteErrorDetail& error) : endpoint(endpoint) { + error.cloneTo(&this->error); + } ShardEndpoint endpoint; - write_ops::WriteError error; + WriteErrorDetail error; }; /** @@ -167,8 +168,7 @@ public: * Stores an error that occurred trying to send/recv a TargetedWriteBatch for this * BatchWriteOp. */ - void noteBatchError(const TargetedWriteBatch& targetedBatch, - const write_ops::WriteError& error); + void noteBatchError(const TargetedWriteBatch& targetedBatch, const WriteErrorDetail& error); /** * Aborts any further writes in the batch with the provided error. There must be no pending @@ -176,7 +176,7 @@ public: * * Batch is finished immediately after aborting. */ - void abortBatch(const write_ops::WriteError& error); + void abortBatch(const WriteErrorDetail& error); /** * Disposes of all tracked targeted batches when an error is encountered during a transaction. @@ -212,7 +212,7 @@ private: /** * Helper function to cancel all the write ops of targeted batches in a map. */ - void _cancelBatches(const write_ops::WriteError& why, TargetedBatchMap&& batchMapToCancel); + void _cancelBatches(const WriteErrorDetail& why, TargetedBatchMap&& batchMapToCancel); OperationContext* const _opCtx; diff --git a/src/mongo/s/write_ops/batch_write_op_test.cpp b/src/mongo/s/write_ops/batch_write_op_test.cpp index 36dcb474b78..9ed96363de0 100644 --- a/src/mongo/s/write_ops/batch_write_op_test.cpp +++ b/src/mongo/s/write_ops/batch_write_op_test.cpp @@ -94,7 +94,11 @@ void buildErrResponse(int code, const std::string& message, BatchedCommandRespon } void addError(int code, const std::string& message, int index, BatchedCommandResponse* response) { - response->addToErrDetails(write_ops::WriteError(index, {ErrorCodes::Error(code), message})); + std::unique_ptr error(new WriteErrorDetail); + error->setStatus({ErrorCodes::Error(code), message}); + error->setIndex(index); + + response->addToErrDetails(error.release()); } void addWCError(BatchedCommandResponse* response) { @@ -183,8 +187,8 @@ TEST_F(BatchWriteOpTest, SingleError) { ASSERT(clientResponse.getOk()); ASSERT_EQUALS(clientResponse.sizeErrDetails(), 1u); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(0).getStatus().code(), response.toStatus().code()); - ASSERT(clientResponse.getErrDetailsAt(0).getStatus().reason().find( + ASSERT_EQUALS(clientResponse.getErrDetailsAt(0)->toStatus().code(), response.toStatus().code()); + ASSERT(clientResponse.getErrDetailsAt(0)->toStatus().reason().find( response.toStatus().reason()) != std::string::npos); ASSERT_EQUALS(clientResponse.getN(), 0); } @@ -807,11 +811,11 @@ TEST_F(BatchWriteOpTest, MultiOpSingleShardErrorUnordered) { ASSERT_EQUALS(clientResponse.getN(), 1); ASSERT(clientResponse.isErrDetailsSet()); ASSERT_EQUALS(clientResponse.sizeErrDetails(), 1u); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(0).getStatus().code(), - response.getErrDetailsAt(0).getStatus().code()); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(0).getStatus().reason(), - response.getErrDetailsAt(0).getStatus().reason()); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(0).getIndex(), 1); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(0)->toStatus().code(), + response.getErrDetailsAt(0)->toStatus().code()); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(0)->toStatus().reason(), + response.getErrDetailsAt(0)->toStatus().reason()); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(0)->getIndex(), 1); } // Multi-op targeting test where two ops go to two separate shards and there's an error on each op @@ -859,16 +863,16 @@ TEST_F(BatchWriteOpTest, MultiOpTwoShardErrorsUnordered) { ASSERT_EQUALS(clientResponse.getN(), 0); ASSERT(clientResponse.isErrDetailsSet()); ASSERT_EQUALS(clientResponse.sizeErrDetails(), 2u); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(0).getStatus().code(), - response.getErrDetailsAt(0).getStatus().code()); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(0).getStatus().reason(), - response.getErrDetailsAt(0).getStatus().reason()); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(0).getIndex(), 0); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(1).getStatus().code(), - response.getErrDetailsAt(0).getStatus().code()); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(1).getStatus().reason(), - response.getErrDetailsAt(0).getStatus().reason()); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(1).getIndex(), 1); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(0)->toStatus().code(), + response.getErrDetailsAt(0)->toStatus().code()); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(0)->toStatus().reason(), + response.getErrDetailsAt(0)->toStatus().reason()); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(0)->getIndex(), 0); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(1)->toStatus().code(), + response.getErrDetailsAt(0)->toStatus().code()); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(1)->toStatus().reason(), + response.getErrDetailsAt(0)->toStatus().reason()); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(1)->getIndex(), 1); } // Multi-op targeting test where each op goes to both shards and there's an error on one op on one @@ -925,11 +929,11 @@ TEST_F(BatchWriteOpTest, MultiOpPartialSingleShardErrorUnordered) { ASSERT_EQUALS(clientResponse.getN(), 3); ASSERT(clientResponse.isErrDetailsSet()); ASSERT_EQUALS(clientResponse.sizeErrDetails(), 1u); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(0).getStatus().code(), - response.getErrDetailsAt(0).getStatus().code()); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(0).getStatus().reason(), - response.getErrDetailsAt(0).getStatus().reason()); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(0).getIndex(), 1); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(0)->toStatus().code(), + response.getErrDetailsAt(0)->toStatus().code()); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(0)->toStatus().reason(), + response.getErrDetailsAt(0)->toStatus().reason()); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(0)->getIndex(), 1); } // Multi-op targeting test where each op goes to both shards and there's an error on one op on one @@ -982,11 +986,11 @@ TEST_F(BatchWriteOpTest, MultiOpPartialSingleShardErrorOrdered) { ASSERT_EQUALS(clientResponse.getN(), 1); ASSERT(clientResponse.isErrDetailsSet()); ASSERT_EQUALS(clientResponse.sizeErrDetails(), 1u); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(0).getStatus().code(), - response.getErrDetailsAt(0).getStatus().code()); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(0).getStatus().reason(), - response.getErrDetailsAt(0).getStatus().reason()); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(0).getIndex(), 0); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(0)->toStatus().code(), + response.getErrDetailsAt(0)->toStatus().code()); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(0)->toStatus().reason(), + response.getErrDetailsAt(0)->toStatus().reason()); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(0)->getIndex(), 0); } // @@ -1144,7 +1148,7 @@ TEST_F(BatchWriteOpTest, MultiOpFailedTargetOrdered) { ASSERT_EQUALS(clientResponse.getN(), 1); ASSERT(clientResponse.isErrDetailsSet()); ASSERT_EQUALS(clientResponse.sizeErrDetails(), 1u); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(0).getIndex(), 1); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(0)->getIndex(), 1); } // Targeting failure on second op in batch op (unordered) @@ -1196,7 +1200,7 @@ TEST_F(BatchWriteOpTest, MultiOpFailedTargetUnordered) { ASSERT_EQUALS(clientResponse.getN(), 2); ASSERT(clientResponse.isErrDetailsSet()); ASSERT_EQUALS(clientResponse.sizeErrDetails(), 1u); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(0).getIndex(), 1); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(0)->getIndex(), 1); } // Batch failure (ok : 0) reported in a multi-op batch (ordered). Expect this gets translated down @@ -1242,8 +1246,8 @@ TEST_F(BatchWriteOpTest, MultiOpFailedBatchOrdered) { ASSERT_EQUALS(clientResponse.getN(), 1); ASSERT(clientResponse.isErrDetailsSet()); ASSERT_EQUALS(clientResponse.sizeErrDetails(), 1u); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(0).getIndex(), 1); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(0).getStatus().code(), response.toStatus().code()); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(0)->getIndex(), 1); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(0)->toStatus().code(), response.toStatus().code()); } // Batch failure (ok : 0) reported in a multi-op batch (unordered). Expect this gets translated down @@ -1296,10 +1300,10 @@ TEST_F(BatchWriteOpTest, MultiOpFailedBatchUnordered) { ASSERT_EQUALS(clientResponse.getN(), 1); ASSERT(clientResponse.isErrDetailsSet()); ASSERT_EQUALS(clientResponse.sizeErrDetails(), 2u); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(0).getIndex(), 1); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(0).getStatus().code(), response.toStatus().code()); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(1).getIndex(), 2); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(1).getStatus().code(), response.toStatus().code()); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(0)->getIndex(), 1); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(0)->toStatus().code(), response.toStatus().code()); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(1)->getIndex(), 2); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(1)->toStatus().code(), response.toStatus().code()); } // Batch aborted (ordered). Expect this gets translated down into write error for first affected @@ -1329,7 +1333,8 @@ TEST_F(BatchWriteOpTest, MultiOpAbortOrdered) { batchOp.noteBatchResponse(*targeted.begin()->second, response, nullptr); ASSERT(!batchOp.isFinished()); - write_ops::WriteError abortError(0, {ErrorCodes::UnknownError, "mock abort"}); + WriteErrorDetail abortError; + abortError.setStatus({ErrorCodes::UnknownError, "mock abort"}); batchOp.abortBatch(abortError); ASSERT(batchOp.isFinished()); @@ -1340,9 +1345,9 @@ TEST_F(BatchWriteOpTest, MultiOpAbortOrdered) { ASSERT_EQUALS(clientResponse.getN(), 1); ASSERT(clientResponse.isErrDetailsSet()); ASSERT_EQUALS(clientResponse.sizeErrDetails(), 1u); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(0).getIndex(), 1); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(0).getStatus().code(), - abortError.getStatus().code()); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(0)->getIndex(), 1); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(0)->toStatus().code(), + abortError.toStatus().code()); } // Batch aborted (unordered). Expect this gets translated down into write errors for all affected @@ -1367,7 +1372,8 @@ TEST_F(BatchWriteOpTest, MultiOpAbortUnordered) { BatchWriteOp batchOp(_opCtx, request); - write_ops::WriteError abortError(0, {ErrorCodes::UnknownError, "mock abort"}); + WriteErrorDetail abortError; + abortError.setStatus({ErrorCodes::UnknownError, "mock abort"}); batchOp.abortBatch(abortError); ASSERT(batchOp.isFinished()); @@ -1378,12 +1384,12 @@ TEST_F(BatchWriteOpTest, MultiOpAbortUnordered) { ASSERT_EQUALS(clientResponse.getN(), 0); ASSERT(clientResponse.isErrDetailsSet()); ASSERT_EQUALS(clientResponse.sizeErrDetails(), 2u); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(0).getIndex(), 0); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(0).getStatus().code(), - abortError.getStatus().code()); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(1).getIndex(), 1); - ASSERT_EQUALS(clientResponse.getErrDetailsAt(1).getStatus().code(), - abortError.getStatus().code()); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(0)->getIndex(), 0); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(0)->toStatus().code(), + abortError.toStatus().code()); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(1)->getIndex(), 1); + ASSERT_EQUALS(clientResponse.getErrDetailsAt(1)->toStatus().code(), + abortError.toStatus().code()); } // Multi-op targeting test where each op goes to both shards and both return a write concern error @@ -1648,7 +1654,7 @@ TEST_F(BatchWriteOpTransactionTest, ThrowTargetingErrorsInTransaction_Delete) { ASSERT(response.isErrDetailsSet()); ASSERT_GT(response.sizeErrDetails(), 0u); - ASSERT_EQ(ErrorCodes::UnknownError, response.getErrDetailsAt(0).getStatus().code()); + ASSERT_EQ(ErrorCodes::UnknownError, response.getErrDetailsAt(0)->toStatus().code()); } TEST_F(BatchWriteOpTransactionTest, ThrowTargetingErrorsInTransaction_Update) { @@ -1677,7 +1683,7 @@ TEST_F(BatchWriteOpTransactionTest, ThrowTargetingErrorsInTransaction_Update) { ASSERT(response.isErrDetailsSet()); ASSERT_GT(response.sizeErrDetails(), 0u); - ASSERT_EQ(ErrorCodes::UnknownError, response.getErrDetailsAt(0).getStatus().code()); + ASSERT_EQ(ErrorCodes::UnknownError, response.getErrDetailsAt(0)->toStatus().code()); } } // namespace diff --git a/src/mongo/s/write_ops/batched_command_response.cpp b/src/mongo/s/write_ops/batched_command_response.cpp index 21ab3973070..256f2e0767d 100644 --- a/src/mongo/s/write_ops/batched_command_response.cpp +++ b/src/mongo/s/write_ops/batched_command_response.cpp @@ -40,13 +40,17 @@ namespace mongo { -MONGO_INIT_REGISTER_ERROR_EXTRA_INFO(MultipleErrorsOccurredInfo); +using std::string; +using std::unique_ptr; + +using str::stream; const BSONField BatchedCommandResponse::n("n", 0); const BSONField BatchedCommandResponse::nModified("nModified", 0); const BSONField> BatchedCommandResponse::upsertDetails( "upserted"); const BSONField BatchedCommandResponse::electionId("electionId"); +const BSONField> BatchedCommandResponse::writeErrors("writeErrors"); const BSONField BatchedCommandResponse::writeConcernError( "writeConcernError"); const BSONField> BatchedCommandResponse::errorLabels("errorLabels"); @@ -93,31 +97,41 @@ BSONObj BatchedCommandResponse::toBSON() const { if (_isElectionIdSet) builder.appendOID(electionId(), const_cast(&_electionId)); - if (_writeErrors) { - auto truncateErrorMessage = [errorCount = size_t(0), - errorSize = size_t(0)](StringData rawMessage) mutable { + if (_writeErrorDetails.get()) { + auto errorMessage = [errorCount = size_t(0), + errorSize = size_t(0)](StringData rawMessage) mutable { // Start truncating error messages once both of these limits are exceeded. constexpr size_t kErrorSizeTruncationMin = 1024 * 1024; constexpr size_t kErrorCountTruncationMin = 2; if (errorSize >= kErrorSizeTruncationMin && errorCount >= kErrorCountTruncationMin) { - return true; + return ""_sd; } errorCount++; errorSize += rawMessage.size(); - return false; + return rawMessage; }; - BSONArrayBuilder errDetailsBuilder( - builder.subarrayStart(write_ops::WriteCommandReplyBase::kWriteErrorsFieldName)); - for (auto&& writeError : *_writeErrors) { - if (truncateErrorMessage(writeError.getStatus().reason())) { - write_ops::WriteError truncatedError(writeError.getIndex(), - writeError.getStatus().withReason("")); - errDetailsBuilder.append(truncatedError.serialize()); - } else { - errDetailsBuilder.append(writeError.serialize()); - } + BSONArrayBuilder errDetailsBuilder(builder.subarrayStart(writeErrors())); + for (auto&& writeError : *_writeErrorDetails) { + BSONObjBuilder errDetailsDocument(errDetailsBuilder.subobjStart()); + + if (writeError->isIndexSet()) + errDetailsDocument.append(WriteErrorDetail::index(), writeError->getIndex()); + + auto status = writeError->toStatus(); + errDetailsDocument.append(WriteErrorDetail::errCode(), status.code()); + errDetailsDocument.append(WriteErrorDetail::errCodeName(), status.codeString()); + errDetailsDocument.append(WriteErrorDetail::errMessage(), + errorMessage(status.reason())); + if (auto extra = status.extraInfo()) + extra->serialize( + &errDetailsDocument); // TODO consider extra info size for truncation. + + // Only set 'errInfo' if it hasn't been added by serializing 'extra'. + if (writeError->isErrInfoSet() && + !errDetailsDocument.hasField(WriteErrorDetail::errInfo())) + errDetailsDocument.append(WriteErrorDetail::errInfo(), writeError->getErrInfo()); } errDetailsBuilder.done(); } @@ -133,7 +147,7 @@ BSONObj BatchedCommandResponse::toBSON() const { return builder.obj(); } -bool BatchedCommandResponse::parseBSON(const BSONObj& source, std::string* errMsg) { +bool BatchedCommandResponse::parseBSON(const BSONObj& source, string* errMsg) { clear(); std::string dummy; @@ -203,14 +217,11 @@ bool BatchedCommandResponse::parseBSON(const BSONObj& source, std::string* errMs return false; _isElectionIdSet = fieldState == FieldParser::FIELD_SET; - if (auto writeErrorsElem = source[write_ops::WriteCommandReplyBase::kWriteErrorsFieldName]) { - for (auto writeError : writeErrorsElem.Array()) { - if (!_writeErrors) - _writeErrors.emplace(); - _writeErrors->emplace_back(write_ops::WriteError::parse(writeError.Obj())); - } - } - + std::vector* tempErrDetails = nullptr; + fieldState = FieldParser::extract(source, writeErrors, &tempErrDetails, errMsg); + if (fieldState == FieldParser::FIELD_INVALID) + return false; + _writeErrorDetails.reset(tempErrDetails); WriteConcernErrorDetail* wcError = nullptr; fieldState = FieldParser::extract(source, writeConcernError, &wcError, errMsg); if (fieldState == FieldParser::FIELD_INVALID) @@ -257,7 +268,14 @@ void BatchedCommandResponse::clear() { _electionId = OID(); _isElectionIdSet = false; - _writeErrors.reset(); + if (_writeErrorDetails.get()) { + for (std::vector::const_iterator it = _writeErrorDetails->begin(); + it != _writeErrorDetails->end(); + ++it) { + delete *it; + }; + _writeErrorDetails.reset(); + } _wcErrDetails.reset(); } @@ -299,7 +317,7 @@ void BatchedCommandResponse::setUpsertDetails( for (std::vector::const_iterator it = upsertDetails.begin(); it != upsertDetails.end(); ++it) { - std::unique_ptr tempBatchedUpsertDetail(new BatchedUpsertDetail); + unique_ptr tempBatchedUpsertDetail(new BatchedUpsertDetail); (*it)->cloneTo(tempBatchedUpsertDetail.get()); addToUpsertDetails(tempBatchedUpsertDetail.release()); } @@ -371,39 +389,42 @@ OID BatchedCommandResponse::getElectionId() const { return _electionId; } -void BatchedCommandResponse::addToErrDetails(write_ops::WriteError error) { - if (!_writeErrors) - _writeErrors.emplace(); - _writeErrors->emplace_back(std::move(error)); +void BatchedCommandResponse::addToErrDetails(WriteErrorDetail* errDetails) { + if (_writeErrorDetails.get() == nullptr) { + _writeErrorDetails.reset(new std::vector); + } + _writeErrorDetails->push_back(errDetails); } void BatchedCommandResponse::unsetErrDetails() { - _writeErrors.reset(); + if (_writeErrorDetails.get() != nullptr) { + for (std::vector::iterator it = _writeErrorDetails->begin(); + it != _writeErrorDetails->end(); + ++it) { + delete *it; + } + _writeErrorDetails.reset(); + } } bool BatchedCommandResponse::isErrDetailsSet() const { - return _writeErrors.is_initialized(); + return _writeErrorDetails.get() != nullptr; } size_t BatchedCommandResponse::sizeErrDetails() const { - dassert(isErrDetailsSet()); - return _writeErrors->size(); + dassert(_writeErrorDetails.get()); + return _writeErrorDetails->size(); } -std::vector& BatchedCommandResponse::getErrDetails() { - dassert(isErrDetailsSet()); - return *_writeErrors; +const std::vector& BatchedCommandResponse::getErrDetails() const { + dassert(_writeErrorDetails.get()); + return *_writeErrorDetails; } -const std::vector& BatchedCommandResponse::getErrDetails() const { - dassert(isErrDetailsSet()); - return *_writeErrors; -} - -const write_ops::WriteError& BatchedCommandResponse::getErrDetailsAt(size_t pos) const { - dassert(isErrDetailsSet()); - dassert(pos < _writeErrors->size()); - return _writeErrors->at(pos); +const WriteErrorDetail* BatchedCommandResponse::getErrDetailsAt(size_t pos) const { + dassert(_writeErrorDetails.get()); + dassert(_writeErrorDetails->size() > pos); + return _writeErrorDetails->at(pos); } void BatchedCommandResponse::setWriteConcernError(WriteConcernErrorDetail* error) { @@ -424,7 +445,7 @@ Status BatchedCommandResponse::toStatus() const { } if (isErrDetailsSet()) { - return getErrDetails().front().getStatus(); + return getErrDetails().front()->toStatus(); } if (isWriteConcernErrorSet()) { @@ -450,17 +471,4 @@ const std::vector& BatchedCommandResponse::getRetriedStmtIds() const { return _retriedStmtIds; } -std::shared_ptr MultipleErrorsOccurredInfo::parse(const BSONObj& obj) { - // The server never receives this error as a response from another node, so there is never - // need to parse it. - uasserted(645200, - "The MultipleErrorsOccurred error should never be used for intra-cluster " - "communication"); -} - -void MultipleErrorsOccurredInfo::serialize(BSONObjBuilder* bob) const { - BSONObjBuilder errInfoBuilder(bob->subobjStart(write_ops::WriteError::kErrInfoFieldName)); - errInfoBuilder.append("causedBy", _arr); -} - } // namespace mongo diff --git a/src/mongo/s/write_ops/batched_command_response.h b/src/mongo/s/write_ops/batched_command_response.h index d76dbcbe2d8..0883f69db38 100644 --- a/src/mongo/s/write_ops/batched_command_response.h +++ b/src/mongo/s/write_ops/batched_command_response.h @@ -32,10 +32,10 @@ #include "mongo/base/string_data.h" #include "mongo/db/jsobj.h" #include "mongo/db/logical_session_id.h" -#include "mongo/db/ops/write_ops.h" #include "mongo/db/repl/optime.h" #include "mongo/rpc/write_concern_error_detail.h" #include "mongo/s/write_ops/batched_upsert_detail.h" +#include "mongo/s/write_ops/write_error_detail.h" namespace mongo { @@ -52,6 +52,7 @@ public: static const BSONField nModified; static const BSONField> upsertDetails; static const BSONField electionId; + static const BSONField> writeErrors; static const BSONField writeConcernError; static const BSONField> errorLabels; static const BSONField> retriedStmtIds; @@ -113,13 +114,12 @@ public: OID getElectionId() const; // errDetails ownership is transferred to here. - void addToErrDetails(write_ops::WriteError error); + void addToErrDetails(WriteErrorDetail* errDetails); void unsetErrDetails(); bool isErrDetailsSet() const; std::size_t sizeErrDetails() const; - std::vector& getErrDetails(); - const std::vector& getErrDetails() const; - const write_ops::WriteError& getErrDetailsAt(std::size_t pos) const; + const std::vector& getErrDetails() const; + const WriteErrorDetail* getErrDetailsAt(std::size_t pos) const; void setWriteConcernError(WriteConcernErrorDetail* error); bool isWriteConcernErrorSet() const; @@ -165,7 +165,7 @@ private: bool _isElectionIdSet; // (O) Array of item-level error information - boost::optional> _writeErrors; + std::unique_ptr> _writeErrorDetails; // (O) errors that occurred while trying to satisfy the write concern. std::unique_ptr _wcErrDetails; @@ -177,22 +177,4 @@ private: std::vector _retriedStmtIds; }; -/** - * Error, which is very specific to the batch write commands execution and should never be used - * internally between the cluster nodes. Indicates that more than one type of error occurred while - * executing a batch write command and contains the details for each type. - */ -class MultipleErrorsOccurredInfo final : public ErrorExtraInfo { -public: - static constexpr auto code = ErrorCodes::MultipleErrorsOccurred; - - MultipleErrorsOccurredInfo(BSONArray arr) : _arr(std::move(arr)) {} - - static std::shared_ptr parse(const BSONObj& obj); - void serialize(BSONObjBuilder* bob) const; - -private: - BSONArray _arr; -}; - } // namespace mongo diff --git a/src/mongo/s/write_ops/batched_command_response_test.cpp b/src/mongo/s/write_ops/batched_command_response_test.cpp index 55fc49db16f..509af17d0db 100644 --- a/src/mongo/s/write_ops/batched_command_response_test.cpp +++ b/src/mongo/s/write_ops/batched_command_response_test.cpp @@ -27,6 +27,8 @@ * it in the license file. */ +#include "mongo/platform/basic.h" + #include "mongo/db/jsobj.h" #include "mongo/db/ops/write_ops.h" #include "mongo/s/stale_exception.h" @@ -36,27 +38,35 @@ namespace mongo { namespace { -TEST(BatchedCommandResponseTest, Basic) { - BSONArray writeErrorsArray( - BSON_ARRAY(BSON("index" << 0 << "code" << ErrorCodes::IndexNotFound << "errmsg" - << "index 0 failed") - << BSON("index" << 1 << "code" << ErrorCodes::InvalidNamespace << "errmsg" - << "index 1 failed too"))); - - BSONObj writeConcernError(BSON("code" << ErrorCodes::UnknownError << "codeName" - << "UnknownError" - << "errmsg" - << "norepl" - << "errInfo" << BSON("a" << 1))); +TEST(BatchedCommandResponse, Basic) { + BSONArray writeErrorsArray = BSON_ARRAY( + BSON(WriteErrorDetail::index(0) << WriteErrorDetail::errCode(ErrorCodes::IndexNotFound) + << WriteErrorDetail::errCodeName("IndexNotFound") + << WriteErrorDetail::errMessage("index 0 failed") + << WriteErrorDetail::errInfo(BSON("more info" << 1))) + << BSON(WriteErrorDetail::index(1) + << WriteErrorDetail::errCode(ErrorCodes::InvalidNamespace) + << WriteErrorDetail::errCodeName("InvalidNamespace") + << WriteErrorDetail::errMessage("index 1 failed too") + << WriteErrorDetail::errInfo(BSON("more info" << 1)))); + + BSONObj writeConcernError( + BSON("code" << 8 << "codeName" << ErrorCodes::errorString(ErrorCodes::Error(8)) << "errmsg" + << "norepl" + << "errInfo" << BSON("a" << 1))); + + auto retriedStmtIds = BSON_ARRAY(1 << 3); BSONObj origResponseObj = - BSON("n" << 0 << "opTime" << mongo::Timestamp(1ULL) << "writeErrors" << writeErrorsArray - << "writeConcernError" << writeConcernError << "retriedStmtIds" - << BSON_ARRAY(1 << 3) << "ok" << 1.0); + BSON(BatchedCommandResponse::n(0) + << "opTime" << mongo::Timestamp(1ULL) << BatchedCommandResponse::writeErrors() + << writeErrorsArray << BatchedCommandResponse::writeConcernError() << writeConcernError + << BatchedCommandResponse::retriedStmtIds() << retriedStmtIds << "ok" << 1.0); std::string errMsg; BatchedCommandResponse response; - ASSERT_TRUE(response.parseBSON(origResponseObj, &errMsg)); + bool ok = response.parseBSON(origResponseObj, &errMsg); + ASSERT_TRUE(ok); ASSERT(response.areRetriedStmtIdsSet()); ASSERT_EQ(response.getRetriedStmtIds().size(), 2); @@ -64,78 +74,22 @@ TEST(BatchedCommandResponseTest, Basic) { ASSERT_EQ(response.getRetriedStmtIds()[1], 3); BSONObj genResponseObj = BSONObjBuilder(response.toBSON()).append("ok", 1.0).obj(); - ASSERT_BSONOBJ_EQ(origResponseObj, genResponseObj); -} - -TEST(BatchedCommandResponseTest, StaleErrorAsStaleShardVersionCompatibility) { - OID epoch = OID::gen(); - - StaleConfigInfo staleInfo(NamespaceString("TestDB.TestColl"), - ChunkVersion(1, 0, epoch, Timestamp(100, 0)), - ChunkVersion(2, 0, epoch, Timestamp(100, 0)), - ShardId("TestShard")); - BSONObjBuilder builder; - staleInfo.serialize(&builder); - - BSONArray writeErrorsArray( - BSON_ARRAY(BSON("index" << 0 << "code" << ErrorCodes::StaleShardVersion << "errmsg" - << "StaleShardVersion error" - << "errInfo" << builder.obj()) - << BSON("index" << 1 << "code" << ErrorCodes::InvalidNamespace << "errmsg" - << "index 1 failed too"))); - BSONObj origResponseObj = - BSON("n" << 0 << "opTime" << mongo::Timestamp(1ULL) << "writeErrors" << writeErrorsArray - << "retriedStmtIds" << BSON_ARRAY(1 << 3) << "ok" << 1.0); - - std::string errMsg; - BatchedCommandResponse response; - ASSERT_TRUE(response.parseBSON(origResponseObj, &errMsg)); - ASSERT_EQ(0, response.getErrDetailsAt(0).getIndex()); - ASSERT_EQ(ErrorCodes::StaleConfig, response.getErrDetailsAt(0).getStatus().code()); - auto extraInfo = response.getErrDetailsAt(0).getStatus().extraInfo(); - ASSERT_EQ(staleInfo.getVersionReceived(), extraInfo->getVersionReceived()); - ASSERT_EQ(*staleInfo.getVersionWanted(), *extraInfo->getVersionWanted()); - ASSERT_EQ(staleInfo.getShardId(), extraInfo->getShardId()); + ASSERT_EQUALS(0, genResponseObj.woCompare(origResponseObj)) + << "\nparsed: " << genResponseObj // + << "\noriginal: " << origResponseObj; } -TEST(BatchedCommandResponseTest, StaleErrorAsStaleConfigCompatibility) { - OID epoch = OID::gen(); - - StaleConfigInfo staleInfo(NamespaceString("TestDB.TestColl"), - ChunkVersion(1, 0, epoch, Timestamp(100, 0)), - ChunkVersion(2, 0, epoch, Timestamp(100, 0)), - ShardId("TestShard")); - BSONObjBuilder builder(BSON("index" << 0 << "code" << ErrorCodes::StaleConfig << "errmsg" - << "StaleConfig error")); - staleInfo.serialize(&builder); - - BSONArray writeErrorsArray(BSON_ARRAY( - builder.obj() << BSON("index" << 1 << "code" << ErrorCodes::InvalidNamespace << "errmsg" - << "index 1 failed too"))); - - BSONObj origResponseObj = - BSON("n" << 0 << "opTime" << mongo::Timestamp(1ULL) << "writeErrors" << writeErrorsArray - << "retriedStmtIds" << BSON_ARRAY(1 << 3) << "ok" << 1.0); - - std::string errMsg; - BatchedCommandResponse response; - ASSERT_TRUE(response.parseBSON(origResponseObj, &errMsg)); - ASSERT_EQ(0, response.getErrDetailsAt(0).getIndex()); - ASSERT_EQ(ErrorCodes::StaleConfig, response.getErrDetailsAt(0).getStatus().code()); - auto extraInfo = response.getErrDetailsAt(0).getStatus().extraInfo(); - ASSERT_EQ(staleInfo.getVersionReceived(), extraInfo->getVersionReceived()); - ASSERT_EQ(*staleInfo.getVersionWanted(), *extraInfo->getVersionWanted()); - ASSERT_EQ(staleInfo.getShardId(), extraInfo->getShardId()); -} - -TEST(BatchedCommandResponseTest, TooManySmallErrors) { +TEST(BatchedCommandResponse, TooManySmallErrors) { BatchedCommandResponse response; const auto bigstr = std::string(1024, 'x'); for (int i = 0; i < 100'000; i++) { - response.addToErrDetails(write_ops::WriteError(i, {ErrorCodes::BadValue, bigstr})); + auto errDetail = std::make_unique(); + errDetail->setIndex(i); + errDetail->setStatus({ErrorCodes::BadValue, bigstr}); + response.addToErrDetails(errDetail.release()); } response.setStatus(Status::OK()); @@ -157,15 +111,18 @@ TEST(BatchedCommandResponseTest, TooManySmallErrors) { } } -TEST(BatchedCommandResponseTest, TooManyBigErrors) { +TEST(BatchedCommandResponse, TooManyBigErrors) { BatchedCommandResponse response; const auto bigstr = std::string(2'000'000, 'x'); const auto smallstr = std::string(10, 'x'); for (int i = 0; i < 100'000; i++) { - response.addToErrDetails(write_ops::WriteError( - i, {ErrorCodes::BadValue, i < 10 ? bigstr : smallstr /* Don't waste too much RAM */})); + auto errDetail = std::make_unique(); + errDetail->setIndex(i); + errDetail->setStatus({ErrorCodes::BadValue, // + i < 10 ? bigstr : smallstr}); // Don't waste too much RAM. + response.addToErrDetails(errDetail.release()); } response.setStatus(Status::OK()); @@ -187,7 +144,52 @@ TEST(BatchedCommandResponseTest, TooManyBigErrors) { } } -TEST(BatchedCommandResponseTest, CompatibilityFromWriteErrorToBatchCommandResponse) { +TEST(BatchedCommandResponse, NoDuplicateErrInfo) { + auto verifySingleErrInfo = [](const BSONObj& obj) { + size_t errInfo = 0; + for (auto elem : obj) { + if (elem.fieldNameStringData() == WriteErrorDetail::errInfo()) { + ++errInfo; + } + } + ASSERT_EQ(errInfo, 1) << "serialized obj with duplicate errInfo " << obj.toString(); + }; + + // Construct a WriteErrorDetail. + Status s(ErrorCodes::DocumentValidationFailure, + "Document failed validation", + BSON("errInfo" << BSON("detailed" + << "error message"))); + BSONObjBuilder b; + s.serialize(&b); + WriteErrorDetail wed; + wed.setIndex(0); + + // Verify it produces a single errInfo. + wed.parseBSON(b.obj(), nullptr); + BSONObj bsonWed = wed.toBSON(); + verifySingleErrInfo(bsonWed); + + BSONObjBuilder bcrBuilder; + bcrBuilder.append("ok", 1); + bcrBuilder.append("writeErrors", BSON_ARRAY(bsonWed)); + + // Construct a 'BatchedCommandResponse' using the above 'bsonWed'. + BatchedCommandResponse bcr; + bcr.parseBSON(bcrBuilder.obj(), nullptr); + BSONObj bsonBcr = bcr.toBSON(); + auto writeErrors = bsonBcr[BatchedCommandResponse::writeErrors()]; + ASSERT(!writeErrors.eoo()); + ASSERT_EQ(writeErrors.type(), BSONType::Array); + + // Verify that the entry in the 'writeErrors' array produces one 'errInfo' field. + for (auto&& elem : writeErrors.Array()) { + ASSERT_EQ(elem.type(), BSONType::Object); + verifySingleErrInfo(elem.embeddedObject()); + } +} + +TEST(BatchedCommandResponse, CompatibilityFromWriteErrorToBatchCommandResponse) { ChunkVersion versionReceived(1, 0, OID::gen(), Timestamp(2, 0)); write_ops::UpdateCommandReply reply; @@ -204,13 +206,14 @@ TEST(BatchedCommandResponseTest, CompatibilityFromWriteErrorToBatchCommandRespon BatchedCommandResponse response; ASSERT_TRUE(response.parseBSON(reply.toBSON(), nullptr)); ASSERT_EQ(1U, response.getErrDetails().size()); - ASSERT_EQ(ErrorCodes::StaleConfig, response.getErrDetailsAt(0).getStatus().code()); - ASSERT_EQ("Test stale config", response.getErrDetailsAt(0).getStatus().reason()); - auto staleInfo = response.getErrDetailsAt(0).getStatus().extraInfo(); - ASSERT_EQ("TestDB.TestColl", staleInfo->getNss().ns()); - ASSERT_EQ(versionReceived, staleInfo->getVersionReceived()); - ASSERT(!staleInfo->getVersionWanted()); - ASSERT_EQ(ShardId("TestShard"), staleInfo->getShardId()); + ASSERT_EQ(ErrorCodes::StaleShardVersion, response.getErrDetailsAt(0)->toStatus().code()); + ASSERT_EQ("Test stale config", response.getErrDetailsAt(0)->toStatus().reason()); + auto staleInfo = + StaleConfigInfo::parseFromCommandError(response.getErrDetailsAt(0)->getErrInfo()); + ASSERT_EQ("TestDB.TestColl", staleInfo.getNss().ns()); + ASSERT_EQ(versionReceived, staleInfo.getVersionReceived()); + ASSERT(!staleInfo.getVersionWanted()); + ASSERT_EQ(ShardId("TestShard"), staleInfo.getShardId()); } } // namespace diff --git a/src/mongo/s/write_ops/write_error_detail.cpp b/src/mongo/s/write_ops/write_error_detail.cpp new file mode 100644 index 00000000000..0c4587b82c0 --- /dev/null +++ b/src/mongo/s/write_ops/write_error_detail.cpp @@ -0,0 +1,187 @@ +/** + * Copyright (C) 2018-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/s/write_ops/write_error_detail.h" + +#include "mongo/db/field_parser.h" +#include "mongo/util/str.h" + +namespace mongo { + +using std::string; + +const BSONField WriteErrorDetail::index("index"); +const BSONField WriteErrorDetail::errCode("code"); +const BSONField WriteErrorDetail::errCodeName("codeName"); +const BSONField WriteErrorDetail::errInfo("errInfo"); +const BSONField WriteErrorDetail::errMessage("errmsg"); + +WriteErrorDetail::WriteErrorDetail() { + clear(); +} + +bool WriteErrorDetail::isValid(std::string* errMsg) const { + std::string dummy; + if (errMsg == nullptr) { + errMsg = &dummy; + } + + // All the mandatory fields must be present. + if (!_isIndexSet) { + *errMsg = str::stream() << "missing " << index.name() << " field"; + return false; + } + + // This object only makes sense when the status isn't OK + if (_status.isOK()) { + *errMsg = "WriteErrorDetail shouldn't have OK status."; + return false; + } + + return true; +} + +BSONObj WriteErrorDetail::toBSON() const { + BSONObjBuilder builder; + + if (_isIndexSet) + builder.append(index(), _index); + + invariant(!_status.isOK()); + builder.append(errCode(), _status.code()); + builder.append(errCodeName(), _status.codeString()); + builder.append(errMessage(), _status.reason()); + if (auto extra = _status.extraInfo()) + extra->serialize(&builder); + + // Only set 'errInfo' if it hasn't been added by serializing 'extra'. + if (_isErrInfoSet && !builder.hasField(errInfo())) + builder.append(errInfo(), _errInfo); + + return builder.obj(); +} + +bool WriteErrorDetail::parseBSON(const BSONObj& source, string* errMsg) { + clear(); + + std::string dummy; + if (!errMsg) + errMsg = &dummy; + + FieldParser::FieldState fieldState; + fieldState = FieldParser::extract(source, index, &_index, errMsg); + if (fieldState == FieldParser::FIELD_INVALID) + return false; + _isIndexSet = fieldState == FieldParser::FIELD_SET; + + int errCodeValue; + fieldState = FieldParser::extract(source, errCode, &errCodeValue, errMsg); + if (fieldState == FieldParser::FIELD_INVALID) + return false; + bool haveStatus = fieldState == FieldParser::FIELD_SET; + std::string errMsgValue; + fieldState = FieldParser::extract(source, errMessage, &errMsgValue, errMsg); + if (fieldState == FieldParser::FIELD_INVALID) + return false; + haveStatus = haveStatus && fieldState == FieldParser::FIELD_SET; + if (!haveStatus) { + *errMsg = "missing code or errmsg field"; + return false; + } + _status = Status(ErrorCodes::Error(errCodeValue), errMsgValue, source); + + fieldState = FieldParser::extract(source, errInfo, &_errInfo, errMsg); + if (fieldState == FieldParser::FIELD_INVALID) + return false; + _isErrInfoSet = fieldState == FieldParser::FIELD_SET; + + return true; +} + +void WriteErrorDetail::clear() { + _index = 0; + _isIndexSet = false; + + _status = Status::OK(); + + _errInfo = BSONObj(); + _isErrInfoSet = false; +} + +void WriteErrorDetail::cloneTo(WriteErrorDetail* other) const { + other->clear(); + + other->_index = _index; + other->_isIndexSet = _isIndexSet; + + other->_status = _status; + + other->_errInfo = _errInfo; + other->_isErrInfoSet = _isErrInfoSet; +} + +std::string WriteErrorDetail::toString() const { + return "implement me"; +} + +void WriteErrorDetail::setIndex(int index) { + _index = index; + _isIndexSet = true; +} + +bool WriteErrorDetail::isIndexSet() const { + return _isIndexSet; +} + +int WriteErrorDetail::getIndex() const { + dassert(_isIndexSet); + return _index; +} + +Status WriteErrorDetail::toStatus() const { + return _status; +} + +void WriteErrorDetail::setErrInfo(const BSONObj& errInfo) { + _errInfo = errInfo.getOwned(); + _isErrInfoSet = true; +} + +bool WriteErrorDetail::isErrInfoSet() const { + return _isErrInfoSet; +} + +const BSONObj& WriteErrorDetail::getErrInfo() const { + dassert(_isErrInfoSet); + return _errInfo; +} + +} // namespace mongo diff --git a/src/mongo/s/write_ops/write_error_detail.h b/src/mongo/s/write_ops/write_error_detail.h new file mode 100644 index 00000000000..6e8c47aa1d6 --- /dev/null +++ b/src/mongo/s/write_ops/write_error_detail.h @@ -0,0 +1,107 @@ +/** + * Copyright (C) 2018-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include +#include + +#include "mongo/base/string_data.h" +#include "mongo/db/jsobj.h" + +namespace mongo { + +/** + * This class represents the layout and content of a insert/update/delete runCommand, + * the response side. + */ +class WriteErrorDetail { +public: + // + // schema declarations + // + + static const BSONField index; + static const BSONField errCode; + static const BSONField errCodeName; + static const BSONField errInfo; + static const BSONField errMessage; + + // + // construction / destruction + // + + WriteErrorDetail(); + + /** Copies all the fields present in 'this' to 'other'. */ + void cloneTo(WriteErrorDetail* other) const; + + // + // bson serializable interface implementation + // + + bool isValid(std::string* errMsg) const; + BSONObj toBSON() const; + bool parseBSON(const BSONObj& source, std::string* errMsg); + void clear(); + std::string toString() const; + + // + // individual field accessors + // + + void setIndex(int index); + bool isIndexSet() const; + int getIndex() const; + + void setStatus(Status status) { + _status = std::move(status); + } + Status toStatus() const; + + void setErrInfo(const BSONObj& errInfo); + bool isErrInfoSet() const; + const BSONObj& getErrInfo() const; + +private: + // Convention: (M)andatory, (O)ptional + + // (M) number of the batch item the error refers to + int _index; + bool _isIndexSet; + + // (M) The code and message of this error. Must be set to a non-OK status. + Status _status = Status::OK(); + + // (O) further details about the batch item error + BSONObj _errInfo; + bool _isErrInfoSet; +}; + +} // namespace mongo diff --git a/src/mongo/s/write_ops/write_op.cpp b/src/mongo/s/write_ops/write_op.cpp index f403714a020..c0288b7080a 100644 --- a/src/mongo/s/write_ops/write_op.cpp +++ b/src/mongo/s/write_ops/write_op.cpp @@ -27,57 +27,17 @@ * it in the license file. */ +#include "mongo/platform/basic.h" + #include "mongo/s/write_ops/write_op.h" #include "mongo/s/transaction_router.h" #include "mongo/util/assert_util.h" namespace mongo { -namespace { - -bool isRetryErrCode(int errCode) { - return errCode == ErrorCodes::StaleShardVersion || errCode == ErrorCodes::StaleConfig || - errCode == ErrorCodes::StaleDbVersion || - errCode == ErrorCodes::ShardCannotRefreshDueToLocksHeld || - errCode == ErrorCodes::TenantMigrationAborted; -} - -bool errorsAllSame(const std::vector& errOps) { - auto errCode = errOps.front()->error->getStatus().code(); - if (std::all_of(++errOps.begin(), errOps.end(), [errCode](const ChildWriteOp* errOp) { - return errOp->error->getStatus().code() == errCode; - })) { - return true; - } - - return false; -} - -// Aggregate a bunch of errors for a single op together -write_ops::WriteError combineOpErrors(const std::vector& errOps) { - // Special case single response or all errors are the same - if (errOps.size() == 1 || errorsAllSame(errOps)) { - return *errOps.front()->error; - } - // Generate the multi-error message below - std::stringstream msg("multiple errors for op : "); - - BSONArrayBuilder errB; - for (std::vector::const_iterator it = errOps.begin(); it != errOps.end(); - ++it) { - const ChildWriteOp* errOp = *it; - if (it != errOps.begin()) - msg << " :: and :: "; - msg << errOp->error->getStatus().reason(); - errB.append(errOp->error->serialize()); - } - - return write_ops::WriteError(errOps.front()->error->getIndex(), - Status(MultipleErrorsOccurredInfo(errB.arr()), msg.str())); -} - -} // namespace +using std::stringstream; +using std::vector; const BatchItemRef& WriteOp::getWriteItem() const { return _itemRef; @@ -87,7 +47,7 @@ WriteOpState WriteOp::getWriteState() const { return _state; } -const write_ops::WriteError& WriteOp::getOpError() const { +const WriteErrorDetail& WriteOp::getOpError() const { dassert(_state == WriteOpState_Error); return *_error; } @@ -146,6 +106,50 @@ size_t WriteOp::getNumTargeted() { return _childOps.size(); } +static bool isRetryErrCode(int errCode) { + return errCode == ErrorCodes::StaleShardVersion || errCode == ErrorCodes::StaleDbVersion || + errCode == ErrorCodes::ShardCannotRefreshDueToLocksHeld || + errCode == ErrorCodes::TenantMigrationAborted; +} + +static bool errorsAllSame(const vector& errOps) { + auto errCode = errOps.front()->error->toStatus().code(); + if (std::all_of(++errOps.begin(), errOps.end(), [errCode](const ChildWriteOp* errOp) { + return errOp->error->toStatus().code() == errCode; + })) { + return true; + } + + return false; +} + +// Aggregate a bunch of errors for a single op together +static void combineOpErrors(const vector& errOps, WriteErrorDetail* error) { + // Special case single response or all errors are the same + if (errOps.size() == 1 || errorsAllSame(errOps)) { + errOps.front()->error->cloneTo(error); + return; + } + + // Generate the multi-error message below + stringstream msg; + msg << "multiple errors for op : "; + + BSONArrayBuilder errB; + for (vector::const_iterator it = errOps.begin(); it != errOps.end(); + ++it) { + const ChildWriteOp* errOp = *it; + if (it != errOps.begin()) + msg << " :: and :: "; + msg << errOp->error->toStatus().reason(); + errB.append(errOp->error->toBSON()); + } + + error->setErrInfo(BSON("causedBy" << errB.arr())); + error->setIndex(errOps.front()->error->getIndex()); + error->setStatus({ErrorCodes::MultipleErrorsOccurred, msg.str()}); +} + /** * This is the core function which aggregates all the results of a write operation on multiple * shards and updates the write operation's state. @@ -170,7 +174,7 @@ void WriteOp::_updateOpState() { childErrors.push_back(&childOp); // Any non-retry error aborts all - if (_inTxn || !isRetryErrCode(childOp.error->getStatus().code())) { + if (_inTxn || !isRetryErrCode(childOp.error->toStatus().code())) { isRetryError = false; } } @@ -179,7 +183,8 @@ void WriteOp::_updateOpState() { if (!childErrors.empty() && isRetryError) { _state = WriteOpState_Ready; } else if (!childErrors.empty()) { - _error = combineOpErrors(childErrors); + _error.reset(new WriteErrorDetail); + combineOpErrors(childErrors, _error.get()); _state = WriteOpState_Error; } else if (hasPendingChild && _inTxn) { // Return early here since this means that there were no errors while in txn @@ -193,14 +198,17 @@ void WriteOp::_updateOpState() { _childOps.clear(); } -void WriteOp::cancelWrites(const write_ops::WriteError* why) { +void WriteOp::cancelWrites(const WriteErrorDetail* why) { invariant(_state == WriteOpState_Pending || _state == WriteOpState_Ready); for (auto& childOp : _childOps) { if (childOp.state == WriteOpState_Pending) { childOp.endpoint.reset(new ShardEndpoint(childOp.pendingWrite->endpoint)); - if (why) - childOp.error = *why; + if (why) { + childOp.error.reset(new WriteErrorDetail); + why->cloneTo(childOp.error.get()); + } + childOp.state = WriteOpState_Cancelled; } } @@ -220,23 +228,24 @@ void WriteOp::noteWriteComplete(const TargetedWrite& targetedWrite) { _updateOpState(); } -void WriteOp::noteWriteError(const TargetedWrite& targetedWrite, - const write_ops::WriteError& error) { +void WriteOp::noteWriteError(const TargetedWrite& targetedWrite, const WriteErrorDetail& error) { const WriteOpRef& ref = targetedWrite.writeOpRef; auto& childOp = _childOps[ref.second]; childOp.pendingWrite = nullptr; childOp.endpoint.reset(new ShardEndpoint(targetedWrite.endpoint)); - childOp.error = error; + childOp.error.reset(new WriteErrorDetail); + error.cloneTo(childOp.error.get()); dassert(ref.first == _itemRef.getItemIndex()); childOp.error->setIndex(_itemRef.getItemIndex()); childOp.state = WriteOpState_Error; _updateOpState(); } -void WriteOp::setOpError(const write_ops::WriteError& error) { +void WriteOp::setOpError(const WriteErrorDetail& error) { dassert(_state == WriteOpState_Ready); - _error = error; + _error.reset(new WriteErrorDetail); + error.cloneTo(_error.get()); _error->setIndex(_itemRef.getItemIndex()); _state = WriteOpState_Error; // No need to updateOpState, set directly diff --git a/src/mongo/s/write_ops/write_op.h b/src/mongo/s/write_ops/write_op.h index 73f6c4ce2a6..1eb9ead8fbf 100644 --- a/src/mongo/s/write_ops/write_op.h +++ b/src/mongo/s/write_ops/write_op.h @@ -34,6 +34,7 @@ #include "mongo/s/ns_targeter.h" #include "mongo/s/write_ops/batched_command_request.h" +#include "mongo/s/write_ops/write_error_detail.h" namespace mongo { @@ -106,7 +107,7 @@ public: * * Can only be used in state _Error */ - const write_ops::WriteError& getOpError() const; + const WriteErrorDetail& getOpError() const; /** * Creates TargetedWrite operations for every applicable shard, which contain the @@ -134,7 +135,7 @@ public: * Can only be called when state is _Pending, or is a no-op if called when the state * is still _Ready (and therefore no writes are pending). */ - void cancelWrites(const write_ops::WriteError* why); + void cancelWrites(const WriteErrorDetail* why); /** * Marks the targeted write as finished for this write op. @@ -150,14 +151,14 @@ public: * As above, one of noteWriteComplete or noteWriteError should be called exactly once for * every TargetedWrite. */ - void noteWriteError(const TargetedWrite& targetedWrite, const write_ops::WriteError& error); + void noteWriteError(const TargetedWrite& targetedWrite, const WriteErrorDetail& error); /** * Sets the error for this write op directly, and forces the state to _Error. * * Should only be used when in state _Ready. */ - void setOpError(const write_ops::WriteError& error); + void setOpError(const WriteErrorDetail& error); private: /** @@ -175,7 +176,7 @@ private: std::vector _childOps; // filled when state == _Error - boost::optional _error; + std::unique_ptr _error; // Whether this write is part of a transaction. const bool _inTxn; @@ -205,7 +206,7 @@ struct ChildWriteOp { std::unique_ptr endpoint; // filled when state == _Error or (optionally) when state == _Cancelled - boost::optional error; + std::unique_ptr error; }; // First value is write item index in the batch, second value is child write op index diff --git a/src/mongo/s/write_ops/write_op_test.cpp b/src/mongo/s/write_ops/write_op_test.cpp index 827688cd6a9..a103ba479c0 100644 --- a/src/mongo/s/write_ops/write_op_test.cpp +++ b/src/mongo/s/write_ops/write_op_test.cpp @@ -42,6 +42,14 @@ namespace { const NamespaceString kNss("foo.bar"); +WriteErrorDetail buildError(int code, const BSONObj& info, const std::string& message) { + WriteErrorDetail error; + error.setStatus({ErrorCodes::Error(code), message}); + error.setErrInfo(info); + + return error; +} + write_ops::DeleteOpEntry buildDelete(const BSONObj& query, bool multi) { write_ops::DeleteOpEntry entry; entry.setQ(query); @@ -83,11 +91,14 @@ TEST_F(WriteOpTest, BasicError) { WriteOp writeOp(BatchItemRef(&request, 0), false); ASSERT_EQUALS(writeOp.getWriteState(), WriteOpState_Ready); - write_ops::WriteError error(0, {ErrorCodes::UnknownError, "some message"}); - writeOp.setOpError(error); + const auto error(buildError(ErrorCodes::UnknownError, BSON("data" << 12345), "some message")); + writeOp.setOpError(error); ASSERT_EQUALS(writeOp.getWriteState(), WriteOpState_Error); - ASSERT_EQUALS(writeOp.getOpError().getStatus(), error.getStatus()); + ASSERT_EQUALS(writeOp.getOpError().toStatus().code(), error.toStatus().code()); + ASSERT_EQUALS(writeOp.getOpError().getErrInfo()["data"].Int(), + error.getErrInfo()["data"].Int()); + ASSERT_EQUALS(writeOp.getOpError().toStatus().reason(), error.toStatus().reason()); } TEST_F(WriteOpTest, TargetSingle) { @@ -225,8 +236,9 @@ TEST_F(WriteOpTest, TargetMultiAllShardsAndErrorSingleChildOp) { ASSERT(ChunkVersion::isIgnoredVersion(*targeted[1]->endpoint.shardVersion)); // Simulate retryable error. - write_ops::WriteError retryableError( - 0, {ErrorCodes::StaleShardVersion, "simulate ssv error for test"}); + WriteErrorDetail retryableError; + retryableError.setIndex(0); + retryableError.setStatus({ErrorCodes::StaleShardVersion, "simulate ssv error for test"}); writeOp.noteWriteError(*targeted[0], retryableError); // State should not change until we have result from all nodes. @@ -260,11 +272,15 @@ TEST_F(WriteOpTest, ErrorSingle) { ASSERT_EQUALS(targeted.size(), 1u); assertEndpointsEqual(targeted.front()->endpoint, endpoint); - write_ops::WriteError error(0, {ErrorCodes::UnknownError, "some message"}); + const auto error(buildError(ErrorCodes::UnknownError, BSON("data" << 12345), "some message")); + writeOp.noteWriteError(*targeted.front(), error); ASSERT_EQUALS(writeOp.getWriteState(), WriteOpState_Error); - ASSERT_EQUALS(writeOp.getOpError().getStatus(), error.getStatus()); + ASSERT_EQUALS(writeOp.getOpError().toStatus().code(), error.toStatus().code()); + ASSERT_EQUALS(writeOp.getOpError().getErrInfo()["data"].Int(), + error.getErrInfo()["data"].Int()); + ASSERT_EQUALS(writeOp.getOpError().toStatus().reason(), error.toStatus().reason()); } // Cancel single targeting test @@ -322,7 +338,8 @@ TEST_F(WriteOpTest, RetrySingleOp) { assertEndpointsEqual(targeted.front()->endpoint, endpoint); // Stale exception - write_ops::WriteError error(0, {ErrorCodes::StaleShardVersion, "some message"}); + const auto error( + buildError(ErrorCodes::StaleShardVersion, BSON("data" << 12345), "some message")); writeOp.noteWriteError(*targeted.front(), error); ASSERT_EQUALS(writeOp.getWriteState(), WriteOpState_Ready); @@ -411,13 +428,13 @@ TEST_F(WriteOpTransactionTest, TargetMultiAllShardsAndErrorSingleChildOp) { ASSERT_EQUALS(targeted[1]->endpoint.shardName, endpointB.shardName); // Simulate retryable error. - write_ops::WriteError retryableError( - 0, {ErrorCodes::StaleShardVersion, "simulate ssv error for test"}); + WriteErrorDetail retryableError; + retryableError.setIndex(0); + retryableError.setStatus({ErrorCodes::StaleShardVersion, "simulate ssv error for test"}); writeOp.noteWriteError(*targeted[0], retryableError); // State should change to error right away even with retryable error when in a transaction. ASSERT_EQUALS(writeOp.getWriteState(), WriteOpState_Error); - ASSERT_EQUALS(writeOp.getOpError().getStatus(), retryableError.getStatus()); } } // namespace -- cgit v1.2.1