summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHaley Connelly <haley.connelly@mongodb.com>2021-05-18 22:54:51 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-06-03 00:03:46 +0000
commit1274f16682fd01d35c98db39b202d7acdf3e554b (patch)
tree5e50c2784b5b308c70ea3b1e60897e70c22e400b
parentbdb1299412486fab396214f5745d1768a56a449f (diff)
downloadmongo-1274f16682fd01d35c98db39b202d7acdf3e554b.tar.gz
SERVER-55686 Prevent resharding coordinator document from exceeding 16MB BSON size limit
-rw-r--r--src/mongo/base/error_codes.yml3
-rw-r--r--src/mongo/db/s/resharding/resharding_coordinator_observer_test.cpp2
-rw-r--r--src/mongo/db/s/resharding/resharding_coordinator_service.cpp19
-rw-r--r--src/mongo/db/s/resharding/resharding_coordinator_test.cpp2
-rw-r--r--src/mongo/db/s/resharding/resharding_donor_service.cpp27
-rw-r--r--src/mongo/db/s/resharding/resharding_donor_service.h11
-rw-r--r--src/mongo/db/s/resharding/resharding_donor_service_test.cpp62
-rw-r--r--src/mongo/db/s/resharding/resharding_recipient_service.cpp22
-rw-r--r--src/mongo/db/s/resharding/resharding_recipient_service.h11
-rw-r--r--src/mongo/db/s/resharding/resharding_recipient_service_test.cpp63
-rw-r--r--src/mongo/db/s/resharding_util.cpp31
-rw-r--r--src/mongo/db/s/resharding_util.h21
12 files changed, 250 insertions, 24 deletions
diff --git a/src/mongo/base/error_codes.yml b/src/mongo/base/error_codes.yml
index 5208c434cb4..98fc37a9558 100644
--- a/src/mongo/base/error_codes.yml
+++ b/src/mongo/base/error_codes.yml
@@ -419,7 +419,6 @@ error_codes:
- {code: 337, name: AuthenticationAbandoned, categories: [InternalOnly]}
- {code: 338, name: ReshardCollectionInProgress}
-
- {code: 339, name: NoSuchReshardCollection}
- {code: 340, name: ReshardCollectionCommitted}
- {code: 341, name: ReshardCollectionAborted}
@@ -442,6 +441,8 @@ error_codes:
- {code: 349, name: KeyPatternShorterThanBound}
+ - {code: 350, name: ReshardCollectionTruncatedError}
+
# Error codes 4000-8999 are reserved.
# Non-sequential error codes for compatibility only)
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_observer_test.cpp b/src/mongo/db/s/resharding/resharding_coordinator_observer_test.cpp
index a10f63fe08b..8da8ad5fe10 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_observer_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_observer_test.cpp
@@ -51,7 +51,7 @@ protected:
auto coordinatorDoc = ReshardingCoordinatorDocument();
coordinatorDoc.setRecipientShards(std::move(recipients));
coordinatorDoc.setDonorShards(std::move(donors));
- emplaceAbortReasonIfExists(coordinatorDoc, abortReason);
+ emplaceTruncatedAbortReasonIfExists(coordinatorDoc, abortReason);
return coordinatorDoc;
}
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp
index 3fdfc2deca6..f53c2a0c7d1 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp
@@ -885,7 +885,7 @@ void ReshardingCoordinatorExternalStateImpl::removeCoordinatorDocAndReshardingFi
ReshardingCoordinatorDocument updatedCoordinatorDoc = coordinatorDoc;
updatedCoordinatorDoc.setState(CoordinatorStateEnum::kDone);
- emplaceAbortReasonIfExists(updatedCoordinatorDoc, abortReason);
+ emplaceTruncatedAbortReasonIfExists(updatedCoordinatorDoc, abortReason);
ShardingCatalogManager::get(opCtx)->bumpCollectionVersionAndChangeMetadataInTxn(
opCtx,
@@ -1093,12 +1093,6 @@ ReshardingCoordinatorService::ReshardingCoordinator::_runUntilReadyToPersistDeci
return status;
}
- // If the abort cancellation token was triggered, implying that a user ran the abort
- // command, override with the abort error code.
- if (_ctHolder->isAborted()) {
- status = {ErrorCodes::ReshardCollectionAborted, status.reason()};
- }
-
auto nss = _coordinatorDoc.getSourceNss();
LOGV2(4956902,
"Resharding failed",
@@ -1106,6 +1100,15 @@ ReshardingCoordinatorService::ReshardingCoordinator::_runUntilReadyToPersistDeci
"newShardKeyPattern"_attr = _coordinatorDoc.getReshardingKey(),
"error"_attr = status);
+ if (_ctHolder->isAborted()) {
+ // If the abort cancellation token was triggered, implying that a user ran the abort
+ // command, override status with a resharding abort error.
+ //
+ // Note for debugging purposes: Ensure the original error status is recorded in the
+ // logs before replacing it.
+ status = {ErrorCodes::ReshardCollectionAborted, "aborted"};
+ }
+
if (_coordinatorDoc.getState() == CoordinatorStateEnum::kUnused) {
return status;
}
@@ -1573,7 +1576,7 @@ void ReshardingCoordinatorService::ReshardingCoordinator::
updatedCoordinatorDoc.setState(nextState);
emplaceApproxBytesToCopyIfExists(updatedCoordinatorDoc, std::move(approxCopySize));
emplaceCloneTimestampIfExists(updatedCoordinatorDoc, std::move(cloneTimestamp));
- emplaceAbortReasonIfExists(updatedCoordinatorDoc, abortReason);
+ emplaceTruncatedAbortReasonIfExists(updatedCoordinatorDoc, abortReason);
auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
_reshardingCoordinatorExternalState->writeStateTransitionAndCatalogUpdatesThenBumpShardVersions(
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_test.cpp b/src/mongo/db/s/resharding/resharding_coordinator_test.cpp
index 253c97032cb..d235748c3e7 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_test.cpp
@@ -673,7 +673,7 @@ protected:
auto expectedCoordinatorDoc = coordinatorDoc;
expectedCoordinatorDoc.setState(CoordinatorStateEnum::kAborting);
auto abortReason = Status{errorCode, "reason to abort"};
- emplaceAbortReasonIfExists(expectedCoordinatorDoc, abortReason);
+ emplaceTruncatedAbortReasonIfExists(expectedCoordinatorDoc, abortReason);
writeStateTransitionUpdateExpectSuccess(operationContext(), expectedCoordinatorDoc);
}
diff --git a/src/mongo/db/s/resharding/resharding_donor_service.cpp b/src/mongo/db/s/resharding/resharding_donor_service.cpp
index c3769f74c89..c15111328ce 100644
--- a/src/mongo/db/s/resharding/resharding_donor_service.cpp
+++ b/src/mongo/db/s/resharding/resharding_donor_service.cpp
@@ -260,6 +260,20 @@ ExecutorFuture<void> ReshardingDonorService::DonorStateMachine::_runUntilBlockin
.onUnrecoverableError([](const Status& status) {})
.until([](const Status& status) { return status.isOK(); })
.on(**executor, abortToken);
+ })
+ .onCompletion([this, executor, abortToken](Status status) {
+ if (abortToken.isCanceled()) {
+ return ExecutorFuture<void>(**executor, status);
+ }
+
+ {
+ // The donor is done with all local transitions until the coordinator makes its
+ // decision.
+ stdx::lock_guard<Latch> lk(_mutex);
+ invariant(_donorCtx.getState() >= DonorStateEnum::kError);
+ ensureFulfilledPromise(lk, _inBlockingWritesOrError);
+ }
+ return ExecutorFuture<void>(**executor, status);
});
}
@@ -527,10 +541,13 @@ ExecutorFuture<void> ReshardingDonorService::DonorStateMachine::
.thenRunOn(**executor)
.then([this]() { _transitionState(DonorStateEnum::kDonatingOplogEntries); })
.onCompletion([=](Status s) {
- if (MONGO_unlikely(
- reshardingDonorFailsAfterTransitionToDonatingOplogEntries.shouldFail())) {
- uasserted(ErrorCodes::InternalError, "Failing for test");
- }
+ reshardingDonorFailsAfterTransitionToDonatingOplogEntries.execute(
+ [&](const BSONObj& data) {
+ auto errmsgElem = data["errmsg"];
+ StringData errmsg =
+ errmsgElem ? errmsgElem.checkAndGetStringData() : "Failing for test"_sd;
+ uasserted(ErrorCodes::InternalError, errmsg);
+ });
});
}
@@ -714,7 +731,7 @@ void ReshardingDonorService::DonorStateMachine::_transitionToDonatingInitialData
void ReshardingDonorService::DonorStateMachine::_transitionToError(Status abortReason) {
auto newDonorCtx = _donorCtx;
newDonorCtx.setState(DonorStateEnum::kError);
- emplaceAbortReasonIfExists(newDonorCtx, abortReason);
+ emplaceTruncatedAbortReasonIfExists(newDonorCtx, abortReason);
_transitionState(std::move(newDonorCtx));
}
diff --git a/src/mongo/db/s/resharding/resharding_donor_service.h b/src/mongo/db/s/resharding/resharding_donor_service.h
index f8ccd15cfc4..8b5c7852acd 100644
--- a/src/mongo/db/s/resharding/resharding_donor_service.h
+++ b/src/mongo/db/s/resharding/resharding_donor_service.h
@@ -97,6 +97,15 @@ public:
SharedSemiFuture<void> awaitFinalOplogEntriesWritten();
+ /**
+ * Returns a Future fulfilled once the donor locally persists its final state before the
+ * coordinator makes its decision to commit or abort (DonorStateEnum::kError or
+ * DonorStateEnum::kBlockingWrites).
+ */
+ SharedSemiFuture<void> awaitInBlockingWritesOrError() const {
+ return _inBlockingWritesOrError.getFuture();
+ }
+
static void insertStateDocument(OperationContext* opCtx,
const ReshardingDonorDocument& donorDoc);
@@ -228,6 +237,8 @@ private:
SharedPromise<void> _finalOplogEntriesWritten;
+ SharedPromise<void> _inBlockingWritesOrError;
+
SharedPromise<void> _coordinatorHasDecisionPersisted;
SharedPromise<void> _completionPromise;
diff --git a/src/mongo/db/s/resharding/resharding_donor_service_test.cpp b/src/mongo/db/s/resharding/resharding_donor_service_test.cpp
index 7ea3a8aca98..8a0e4b17c56 100644
--- a/src/mongo/db/s/resharding/resharding_donor_service_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_donor_service_test.cpp
@@ -39,6 +39,7 @@
#include "mongo/db/op_observer_registry.h"
#include "mongo/db/ops/update.h"
#include "mongo/db/ops/update_request.h"
+#include "mongo/db/persistent_task_store.h"
#include "mongo/db/repl/drop_pending_collection_reaper.h"
#include "mongo/db/repl/oplog_entry.h"
#include "mongo/db/repl/primary_only_service_test_fixture.h"
@@ -555,5 +556,66 @@ TEST_F(ReshardingDonorServiceTest, RetainsSourceCollectionOnAbort) {
}
}
+TEST_F(ReshardingDonorServiceTest, TruncatesXLErrorOnDonorDocument) {
+ // TODO (SERVER-57194): enable lock-free reads.
+ bool disableLockFreeReadsOriginalValue = storageGlobalParams.disableLockFreeReads;
+ storageGlobalParams.disableLockFreeReads = true;
+ ON_BLOCK_EXIT(
+ [&] { storageGlobalParams.disableLockFreeReads = disableLockFreeReadsOriginalValue; });
+
+ for (bool isAlsoRecipient : {false, true}) {
+ LOGV2(5568601,
+ "Running case",
+ "test"_attr = _agent.getTestName(),
+ "isAlsoRecipient"_attr = isAlsoRecipient);
+
+ std::string xlErrMsg(6000, 'x');
+ FailPointEnableBlock failpoint("reshardingDonorFailsAfterTransitionToDonatingOplogEntries",
+ BSON("errmsg" << xlErrMsg));
+
+ auto doc = makeStateDocument(isAlsoRecipient);
+ auto opCtx = makeOperationContext();
+ DonorStateMachine::insertStateDocument(opCtx.get(), doc);
+ auto donor = DonorStateMachine::getOrCreate(opCtx.get(), _service, doc.toBSON());
+
+ notifyRecipientsDoneCloning(opCtx.get(), *donor, doc);
+
+ auto localTransitionToErrorFuture = donor->awaitInBlockingWritesOrError();
+ ASSERT_OK(localTransitionToErrorFuture.getNoThrow());
+
+ // The donor still waits for the abort decision from the coordinator despite it having
+ // errored locally. It is therefore safe to check its local state document until
+ // DonorStateMachine::abort() is called.
+ {
+ boost::optional<ReshardingDonorDocument> persistedDonorDocument;
+ PersistentTaskStore<ReshardingDonorDocument> store(
+ NamespaceString::kDonorReshardingOperationsNamespace);
+ store.forEach(
+ opCtx.get(),
+ QUERY(ReshardingDonorDocument::kReshardingUUIDFieldName << doc.getReshardingUUID()),
+ [&](const auto& donorDocument) {
+ persistedDonorDocument.emplace(donorDocument);
+ return false;
+ });
+
+ ASSERT(persistedDonorDocument);
+ auto persistedAbortReasonBSON =
+ persistedDonorDocument->getMutableState().getAbortReason();
+ ASSERT(persistedAbortReasonBSON);
+ // The actual abortReason will be slightly larger than kReshardErrorMaxBytes bytes due
+ // to the primitive truncation algorithm - Check that the total size is less than
+ // kReshardErrorMaxBytes + a couple additional bytes to provide a buffer for the field
+ // name sizes.
+ int maxReshardErrorBytesCeiling = kReshardErrorMaxBytes + 200;
+ ASSERT_LT(persistedAbortReasonBSON->objsize(), maxReshardErrorBytesCeiling);
+ ASSERT_EQ(persistedAbortReasonBSON->getIntField("code"),
+ ErrorCodes::ReshardCollectionTruncatedError);
+ }
+
+ donor->abort(false);
+ ASSERT_OK(donor->getCompletionFuture().getNoThrow());
+ }
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/s/resharding/resharding_recipient_service.cpp b/src/mongo/db/s/resharding/resharding_recipient_service.cpp
index be313078cf6..707cb36901f 100644
--- a/src/mongo/db/s/resharding/resharding_recipient_service.cpp
+++ b/src/mongo/db/s/resharding/resharding_recipient_service.cpp
@@ -72,6 +72,7 @@ MONGO_FAIL_POINT_DEFINE(removeRecipientDocFailpoint);
MONGO_FAIL_POINT_DEFINE(reshardingPauseRecipientBeforeCloning);
MONGO_FAIL_POINT_DEFINE(reshardingPauseRecipientDuringCloning);
MONGO_FAIL_POINT_DEFINE(reshardingPauseRecipientDuringOplogApplication);
+MONGO_FAIL_POINT_DEFINE(reshardingRecipientFailsAfterTransitionToCloning);
namespace {
@@ -218,6 +219,20 @@ ReshardingRecipientService::RecipientStateMachine::_runUntilStrictConsistencyOrE
.onUnrecoverableError([](const Status& status) {})
.until([](const Status& retryStatus) { return retryStatus.isOK(); })
.on(**executor, abortToken);
+ })
+ .onCompletion([this, executor, abortToken](Status status) {
+ if (abortToken.isCanceled()) {
+ return ExecutorFuture<void>(**executor, status);
+ }
+
+ {
+ // The recipient is done with all local transitions until the coordinator makes its
+ // decision.
+ stdx::lock_guard<Latch> lk(_mutex);
+ invariant(_recipientCtx.getState() >= RecipientStateEnum::kError);
+ ensureFulfilledPromise(lk, _inStrictConsistencyOrError);
+ }
+ return ExecutorFuture<void>(**executor, status);
});
}
@@ -565,6 +580,11 @@ ReshardingRecipientService::RecipientStateMachine::_cloneThenTransitionToApplyin
_ensureDataReplicationStarted(opCtx.get(), executor, abortToken);
}
+ reshardingRecipientFailsAfterTransitionToCloning.execute([&](const BSONObj& data) {
+ auto errmsg = data.getStringField("errmsg");
+ uasserted(ErrorCodes::InternalError, errmsg);
+ });
+
{
auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
reshardingPauseRecipientDuringCloning.pauseWhileSet(opCtx.get());
@@ -726,7 +746,7 @@ void ReshardingRecipientService::RecipientStateMachine::_transitionToStrictConsi
void ReshardingRecipientService::RecipientStateMachine::_transitionToError(Status abortReason) {
auto newRecipientCtx = _recipientCtx;
newRecipientCtx.setState(RecipientStateEnum::kError);
- emplaceAbortReasonIfExists(newRecipientCtx, abortReason);
+ emplaceTruncatedAbortReasonIfExists(newRecipientCtx, abortReason);
_transitionState(std::move(newRecipientCtx), boost::none, boost::none);
}
diff --git a/src/mongo/db/s/resharding/resharding_recipient_service.h b/src/mongo/db/s/resharding/resharding_recipient_service.h
index bae2578ba97..71a9aab7e63 100644
--- a/src/mongo/db/s/resharding/resharding_recipient_service.h
+++ b/src/mongo/db/s/resharding/resharding_recipient_service.h
@@ -128,6 +128,15 @@ public:
void interrupt(Status status) override;
/**
+ * Returns a Future fulfilled once the recipient locally persists its final state before the
+ * coordinator makes its decision to commit or abort (RecipientStateEnum::kError or
+ * RecipientStateEnum::kStrictConsistency).
+ */
+ SharedSemiFuture<void> awaitInStrictConsistencyOrError() const {
+ return _inStrictConsistencyOrError.getFuture();
+ }
+
+ /**
* Returns a Future that will be resolved when all work associated with this Instance is done
* making forward progress.
*/
@@ -273,6 +282,8 @@ private:
// ascending order, such that the first promise below will be the first promise fulfilled.
SharedPromise<CloneDetails> _allDonorsPreparedToDonate;
+ SharedPromise<void> _inStrictConsistencyOrError;
+
SharedPromise<void> _coordinatorHasDecisionPersisted;
SharedPromise<void> _completionPromise;
diff --git a/src/mongo/db/s/resharding/resharding_recipient_service_test.cpp b/src/mongo/db/s/resharding/resharding_recipient_service_test.cpp
index 2d73855743d..8b659257a30 100644
--- a/src/mongo/db/s/resharding/resharding_recipient_service_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_recipient_service_test.cpp
@@ -34,6 +34,7 @@
#include "mongo/db/dbdirectclient.h"
#include "mongo/db/op_observer_noop.h"
#include "mongo/db/op_observer_registry.h"
+#include "mongo/db/persistent_task_store.h"
#include "mongo/db/repl/drop_pending_collection_reaper.h"
#include "mongo/db/repl/oplog_entry.h"
#include "mongo/db/repl/primary_only_service_test_fixture.h"
@@ -45,6 +46,7 @@
#include "mongo/db/s/resharding/resharding_recipient_service_external_state.h"
#include "mongo/db/s/resharding/resharding_service_test_helpers.h"
#include "mongo/logv2/log.h"
+#include "mongo/util/fail_point.h"
namespace mongo {
namespace {
@@ -534,5 +536,66 @@ TEST_F(ReshardingRecipientServiceTest, RenamesTemporaryReshardingCollectionWhenD
}
}
+TEST_F(ReshardingRecipientServiceTest, TruncatesXLErrorOnRecipientDocument) {
+ // TODO (SERVER-57194): enable lock-free reads.
+ bool disableLockFreeReadsOriginalValue = storageGlobalParams.disableLockFreeReads;
+ storageGlobalParams.disableLockFreeReads = true;
+ ON_BLOCK_EXIT(
+ [&] { storageGlobalParams.disableLockFreeReads = disableLockFreeReadsOriginalValue; });
+
+ for (bool isAlsoDonor : {false, true}) {
+ LOGV2(5568600,
+ "Running case",
+ "test"_attr = _agent.getTestName(),
+ "isAlsoDonor"_attr = isAlsoDonor);
+
+ std::string xlErrMsg(6000, 'x');
+ FailPointEnableBlock failpoint("reshardingRecipientFailsAfterTransitionToCloning",
+ BSON("errmsg" << xlErrMsg));
+
+ auto doc = makeStateDocument(isAlsoDonor);
+ auto opCtx = makeOperationContext();
+ RecipientStateMachine::insertStateDocument(opCtx.get(), doc);
+ auto recipient = RecipientStateMachine::getOrCreate(opCtx.get(), _service, doc.toBSON());
+
+ notifyToStartCloning(opCtx.get(), *recipient, doc);
+
+ auto localTransitionToErrorFuture = recipient->awaitInStrictConsistencyOrError();
+ ASSERT_OK(localTransitionToErrorFuture.getNoThrow());
+
+ // The recipient still waits for the abort decision from the coordinator despite it having
+ // errored locally. It is therefore safe to check its local state document until
+ // RecipientStateMachine::abort() is called.
+ {
+ boost::optional<ReshardingRecipientDocument> persistedRecipientDocument;
+ PersistentTaskStore<ReshardingRecipientDocument> store(
+ NamespaceString::kRecipientReshardingOperationsNamespace);
+ store.forEach(opCtx.get(),
+ QUERY(ReshardingRecipientDocument::kReshardingUUIDFieldName
+ << doc.getReshardingUUID()),
+ [&](const auto& recipientDocument) {
+ persistedRecipientDocument.emplace(recipientDocument);
+ return false;
+ });
+
+ ASSERT(persistedRecipientDocument);
+ auto persistedAbortReasonBSON =
+ persistedRecipientDocument->getMutableState().getAbortReason();
+ ASSERT(persistedAbortReasonBSON);
+ // The actual abortReason will be slightly larger than kReshardErrorMaxBytes bytes due
+ // to the primitive truncation algorithm - Check that the total size is less than
+ // kReshardErrorMaxBytes + a couple additional bytes to provide a buffer for the field
+ // name sizes.
+ int maxReshardErrorBytesCeiling = kReshardErrorMaxBytes + 200;
+ ASSERT_LT(persistedAbortReasonBSON->objsize(), maxReshardErrorBytesCeiling);
+ ASSERT_EQ(persistedAbortReasonBSON->getIntField("code"),
+ ErrorCodes::ReshardCollectionTruncatedError);
+ }
+
+ recipient->abort(false);
+ ASSERT_OK(recipient->getCompletionFuture().getNoThrow());
+ }
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/s/resharding_util.cpp b/src/mongo/db/s/resharding_util.cpp
index c53b2cffcaa..ccd4787fdcd 100644
--- a/src/mongo/db/s/resharding_util.cpp
+++ b/src/mongo/db/s/resharding_util.cpp
@@ -88,6 +88,33 @@ bool documentBelongsToMe(OperationContext* opCtx,
}
} // namespace
+BSONObj serializeAndTruncateReshardingErrorIfNeeded(Status originalError) {
+ BSONObjBuilder originalBob;
+ originalError.serializeErrorToBSON(&originalBob);
+ auto originalObj = originalBob.obj();
+
+ if (originalObj.objsize() <= kReshardErrorMaxBytes ||
+ originalError.code() == ErrorCodes::ReshardCollectionTruncatedError) {
+ // The provided originalError either meets the size constraints or has already been
+ // truncated (and is just slightly larger than 2000 bytes to avoid complicating the
+ // truncation math).
+ return originalObj;
+ }
+
+ // ReshardCollectionAborted has special internal handling. It should always have a short, fixed
+ // error message so it never exceeds the size limit and requires truncation and error code
+ // substitution.
+ invariant(originalError.code() != ErrorCodes::ReshardCollectionAborted);
+
+ auto originalErrorStr = originalError.toString();
+ auto truncatedErrorStr =
+ str::UTF8SafeTruncation(StringData(originalErrorStr), kReshardErrorMaxBytes);
+ Status truncatedError{ErrorCodes::ReshardCollectionTruncatedError, truncatedErrorStr};
+ BSONObjBuilder truncatedBob;
+ truncatedError.serializeErrorToBSON(&truncatedBob);
+ return truncatedBob.obj();
+}
+
DonorShardEntry makeDonorShard(ShardId shardId,
DonorStateEnum donorState,
boost::optional<Timestamp> minFetchTimestamp,
@@ -95,7 +122,7 @@ DonorShardEntry makeDonorShard(ShardId shardId,
DonorShardContext donorCtx;
donorCtx.setState(donorState);
emplaceMinFetchTimestampIfExists(donorCtx, minFetchTimestamp);
- emplaceAbortReasonIfExists(donorCtx, abortReason);
+ emplaceTruncatedAbortReasonIfExists(donorCtx, abortReason);
return DonorShardEntry{std::move(shardId), std::move(donorCtx)};
}
@@ -105,7 +132,7 @@ RecipientShardEntry makeRecipientShard(ShardId shardId,
boost::optional<Status> abortReason) {
RecipientShardContext recipientCtx;
recipientCtx.setState(recipientState);
- emplaceAbortReasonIfExists(recipientCtx, abortReason);
+ emplaceTruncatedAbortReasonIfExists(recipientCtx, abortReason);
return RecipientShardEntry{std::move(shardId), std::move(recipientCtx)};
}
diff --git a/src/mongo/db/s/resharding_util.h b/src/mongo/db/s/resharding_util.h
index 0a4f792db5f..c01888ddee6 100644
--- a/src/mongo/db/s/resharding_util.h
+++ b/src/mongo/db/s/resharding_util.h
@@ -47,10 +47,12 @@
#include "mongo/s/resharding/common_types_gen.h"
#include "mongo/s/shard_id.h"
#include "mongo/s/write_ops/batched_command_request.h"
+#include "mongo/util/str.h"
namespace mongo {
constexpr auto kReshardFinalOpLogType = "reshardFinalOp"_sd;
+static const auto kReshardErrorMaxBytes = 2000;
/**
* Emplaces the 'fetchTimestamp' onto the ClassWithFetchTimestamp if the timestamp has been
@@ -118,11 +120,21 @@ void emplaceMinFetchTimestampIfExists(ClassWithMinFetchTimestamp& c,
}
/**
+ * Returns a serialized version of the originalError status. If the originalError status exceeds
+ * maxErrorBytes, truncates the status and returns it in the errmsg field of a new status with code
+ * ErrorCodes::ReshardingCollectionTruncatedError.
+ */
+BSONObj serializeAndTruncateReshardingErrorIfNeeded(Status originalError);
+
+/**
* Emplaces the 'abortReason' onto the ClassWithAbortReason if the reason has been emplaced inside
- * the boost::optional.
+ * the boost::optional. If the 'abortReason' is too large, emplaces a status with
+ * ErrorCodes::ReshardCollectionTruncatedError and a truncated version of the 'abortReason' for the
+ * errmsg.
*/
template <class ClassWithAbortReason>
-void emplaceAbortReasonIfExists(ClassWithAbortReason& c, boost::optional<Status> abortReason) {
+void emplaceTruncatedAbortReasonIfExists(ClassWithAbortReason& c,
+ boost::optional<Status> abortReason) {
if (!abortReason) {
return;
}
@@ -134,10 +146,9 @@ void emplaceAbortReasonIfExists(ClassWithAbortReason& c, boost::optional<Status>
return;
}
- BSONObjBuilder bob;
- abortReason.get().serializeErrorToBSON(&bob);
+ auto truncatedAbortReasonObj = serializeAndTruncateReshardingErrorIfNeeded(abortReason.get());
AbortReason abortReasonStruct;
- abortReasonStruct.setAbortReason(bob.obj());
+ abortReasonStruct.setAbortReason(truncatedAbortReasonObj);
c.setAbortReasonStruct(std::move(abortReasonStruct));
}