diff options
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/base/error_codes.yml | 5 | ||||
-rw-r--r-- | src/mongo/db/s/sharding_ddl_coordinator.cpp | 26 | ||||
-rw-r--r-- | src/mongo/db/s/sharding_ddl_coordinator.h | 38 | ||||
-rw-r--r-- | src/mongo/db/s/sharding_ddl_coordinator.idl | 13 | ||||
-rw-r--r-- | src/mongo/db/s/sharding_ddl_util.cpp | 40 | ||||
-rw-r--r-- | src/mongo/db/s/sharding_ddl_util.h | 8 | ||||
-rw-r--r-- | src/mongo/db/s/sharding_ddl_util_test.cpp | 92 |
7 files changed, 218 insertions, 4 deletions
diff --git a/src/mongo/base/error_codes.yml b/src/mongo/base/error_codes.yml index d40868eee0a..15baffdae6a 100644 --- a/src/mongo/base/error_codes.yml +++ b/src/mongo/base/error_codes.yml @@ -506,7 +506,7 @@ error_codes: - {code: 386, name: DuplicateKeyId} - {code: 387, name: EncounteredFLEPayloadWhileRedacting} - + - {code: 388, name: TransactionTooLargeForCache} - {code: 389, name: LibmongocryptError} @@ -516,6 +516,9 @@ error_codes: - {code: 393, name: InvalidTenantId} - {code: 394, name: MovePrimaryRecipientDocNotFound, categories: [InternalOnly]} + + - {code: 395, name: TruncatedSerialization} + # Error codes 4000-8999 are reserved. # Non-sequential error codes for compatibility only) diff --git a/src/mongo/db/s/sharding_ddl_coordinator.cpp b/src/mongo/db/s/sharding_ddl_coordinator.cpp index 62b1e4eef44..5306b9c1415 100644 --- a/src/mongo/db/s/sharding_ddl_coordinator.cpp +++ b/src/mongo/db/s/sharding_ddl_coordinator.cpp @@ -226,6 +226,17 @@ ExecutorFuture<void> ShardingDDLCoordinator::_acquireLockAsync( .on(**executor, token); } +ExecutorFuture<void> ShardingDDLCoordinator::_cleanupOnAbort( + std::shared_ptr<executor::ScopedTaskExecutor> executor, + const CancellationToken& token, + const Status& status) noexcept { + return ExecutorFuture<void>(**executor); +} + +boost::optional<Status> ShardingDDLCoordinator::getAbortReason() const { + return boost::none; +} + void ShardingDDLCoordinator::interrupt(Status status) { LOGV2_DEBUG(5390535, 1, @@ -348,17 +359,25 @@ SemiFuture<void> ShardingDDLCoordinator::run(std::shared_ptr<executor::ScopedTas return status; }) .then([this, executor, token, anchor = shared_from_this()] { - return AsyncTry([this, executor, token] { return _runImpl(executor, token); }) + return AsyncTry([this, executor, token] { + if (const auto& status = getAbortReason()) { + return _cleanupOnAbort(executor, token, *status); + } + + return _runImpl(executor, token); + }) .until([this, token](Status status) { // Retry until either: // - The coordinator succeed // - The coordinator failed with non-retryable error determined by the // coordinator, or an already known retryable error + // - Cleanup is not planned // // If the token is not cancelled we retry because it could have been generated // by a remote node. if (!status.isOK() && !_completeOnError && - (_mustAlwaysMakeProgress() || _isRetriableErrorForDDLCoordinator(status)) && + (getAbortReason() || _mustAlwaysMakeProgress() || + _isRetriableErrorForDDLCoordinator(status)) && !token.isCanceled()) { LOGV2_INFO(5656000, "Re-executing sharding DDL coordinator", @@ -383,7 +402,8 @@ SemiFuture<void> ShardingDDLCoordinator::run(std::shared_ptr<executor::ScopedTas status.isA<ErrorCategory::ShutdownError>()) || token.isCanceled() || _completeOnError); - auto completionStatus = status; + auto completionStatus = + !status.isOK() ? status : getAbortReason().get_value_or(Status::OK()); bool isSteppingDown = token.isCanceled(); diff --git a/src/mongo/db/s/sharding_ddl_coordinator.h b/src/mongo/db/s/sharding_ddl_coordinator.h index 6cbc4cbd099..7b53a15e6f7 100644 --- a/src/mongo/db/s/sharding_ddl_coordinator.h +++ b/src/mongo/db/s/sharding_ddl_coordinator.h @@ -161,6 +161,11 @@ private: virtual ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor, const CancellationToken& token) noexcept = 0; + virtual ExecutorFuture<void> _cleanupOnAbort( + std::shared_ptr<executor::ScopedTaskExecutor> executor, + const CancellationToken& token, + const Status& status) noexcept; + void interrupt(Status status) override final; bool _removeDocument(OperationContext* opCtx); @@ -174,6 +179,8 @@ private: ExecutorFuture<void> _translateTimeseriesNss( std::shared_ptr<executor::ScopedTaskExecutor> executor, const CancellationToken& token); + virtual boost::optional<Status> getAbortReason() const; + Mutex _mutex = MONGO_MAKE_LATCH("ShardingDDLCoordinator::_mutex"); SharedPromise<void> _constructionCompletionPromise; SharedPromise<void> _completionPromise; @@ -401,6 +408,37 @@ protected: osi.setTxnNumber(optSession->getTxnNumber()); return osi; } + + virtual boost::optional<Status> getAbortReason() const override { + const auto& status = _doc.getAbortReason(); + invariant(!status || !status->isOK(), "when persisted, status must be an error"); + return status; + } + + /** + * Persists the abort reason and throws it as an exception. This causes the coordinator to fail, + * and triggers the cleanup future chain since there is a the persisted reason. + */ + void triggerCleanup(OperationContext* opCtx, const Status& status) { + LOGV2_INFO(7418502, + "Coordinator failed, persisting abort reason", + "coordinatorId"_attr = _doc.getId(), + "phase"_attr = serializePhase(_doc.getPhase()), + "reason"_attr = redact(status)); + + auto newDoc = [&] { + stdx::lock_guard lk{_docMutex}; + return _doc; + }(); + + auto coordinatorMetadata = newDoc.getShardingDDLCoordinatorMetadata(); + coordinatorMetadata.setAbortReason(status); + newDoc.setShardingDDLCoordinatorMetadata(std::move(coordinatorMetadata)); + + _updateStateDocument(opCtx, std::move(newDoc)); + + uassertStatusOK(status); + } }; #undef MONGO_LOGV2_DEFAULT_COMPONENT diff --git a/src/mongo/db/s/sharding_ddl_coordinator.idl b/src/mongo/db/s/sharding_ddl_coordinator.idl index 26d5702ec1e..e8336c5c0dc 100644 --- a/src/mongo/db/s/sharding_ddl_coordinator.idl +++ b/src/mongo/db/s/sharding_ddl_coordinator.idl @@ -32,6 +32,7 @@ global: cpp_namespace: "mongo" cpp_includes: - "mongo/db/s/forwardable_operation_metadata.h" + - "mongo/db/s/sharding_ddl_util.h" - "mongo/s/database_version.h" imports: @@ -75,6 +76,7 @@ types: bson_serialization_type: object serializer: "mongo::ForwardableOperationMetadata::toBSON" deserializer: "mongo::ForwardableOperationMetadata" + DatabaseVersion: description: "" cpp_type: DatabaseVersion @@ -82,6 +84,13 @@ types: serializer: "mongo::DatabaseVersion::toBSON" deserializer: "mongo::DatabaseVersion" + ErrorStatus: + bson_serialization_type: any + description: "Status representing an error state" + cpp_type: "::mongo::Status" + serializer: "::mongo::sharding_ddl_util_serializeErrorStatusToBSON" + deserializer: "::mongo::sharding_ddl_util_deserializeErrorStatusFromBSON" + structs: ShardingDDLCoordinatorId: description: "Identifier for a specific sharding DDL Coordinator." @@ -129,3 +138,7 @@ structs: description: "In case of sharded timeseries this variable will hold the underlying bucket namespace" type: namespacestring optional: true + abortReason: + description: "Error triggering the cleanup pipeline" + type: ErrorStatus + optional: true diff --git a/src/mongo/db/s/sharding_ddl_util.cpp b/src/mongo/db/s/sharding_ddl_util.cpp index deca970d22b..6bb773a1b7c 100644 --- a/src/mongo/db/s/sharding_ddl_util.cpp +++ b/src/mongo/db/s/sharding_ddl_util.cpp @@ -30,6 +30,7 @@ #include "mongo/db/s/sharding_ddl_util.h" +#include "mongo/bson/util/bson_extract.h" #include "mongo/db/catalog/collection_catalog.h" #include "mongo/db/cluster_transaction_api.h" #include "mongo/db/commands/feature_compatibility_version.h" @@ -66,6 +67,44 @@ namespace mongo { + +static const size_t kSerializedErrorStatusMaxSize = 1024 * 2; + +void sharding_ddl_util_serializeErrorStatusToBSON(const Status& status, + StringData fieldName, + BSONObjBuilder* bsonBuilder) { + uassert(7418500, "Status must be an error", !status.isOK()); + + BSONObjBuilder tmpBuilder; + status.serialize(&tmpBuilder); + + if (status != ErrorCodes::TruncatedSerialization && + (size_t)tmpBuilder.asTempObj().objsize() > kSerializedErrorStatusMaxSize) { + const auto statusStr = status.toString(); + const auto truncatedStatusStr = + str::UTF8SafeTruncation(statusStr, kSerializedErrorStatusMaxSize); + const Status truncatedStatus{ErrorCodes::TruncatedSerialization, truncatedStatusStr}; + + tmpBuilder.resetToEmpty(); + truncatedStatus.serializeErrorToBSON(&tmpBuilder); + } + + bsonBuilder->append(fieldName, tmpBuilder.obj()); +} + +Status sharding_ddl_util_deserializeErrorStatusFromBSON(const BSONElement& bsonElem) { + const auto& bsonObj = bsonElem.Obj(); + + long long code; + uassertStatusOK(bsonExtractIntegerField(bsonObj, "code", &code)); + uassert(7418501, "Status must be an error", code != ErrorCodes::OK); + + std::string errmsg; + uassertStatusOK(bsonExtractStringField(bsonObj, "errmsg", &errmsg)); + + return {ErrorCodes::Error(code), errmsg, bsonObj}; +} + namespace sharding_ddl_util { namespace { @@ -872,5 +911,6 @@ BSONObj getCriticalSectionReasonForRename(const NamespaceString& from, const Nam << "rename" << "from" << from.toString() << "to" << to.toString()); } + } // namespace sharding_ddl_util } // namespace mongo diff --git a/src/mongo/db/s/sharding_ddl_util.h b/src/mongo/db/s/sharding_ddl_util.h index cc800b04632..988d2bb571a 100644 --- a/src/mongo/db/s/sharding_ddl_util.h +++ b/src/mongo/db/s/sharding_ddl_util.h @@ -40,6 +40,14 @@ #include "mongo/s/request_types/sharded_ddl_commands_gen.h" namespace mongo { + +// TODO (SERVER-74481): Define these functions in the nested `sharding_ddl_util` namespace when the +// IDL compiler will support the use case. +void sharding_ddl_util_serializeErrorStatusToBSON(const Status& status, + StringData fieldName, + BSONObjBuilder* bsonBuilder); +Status sharding_ddl_util_deserializeErrorStatusFromBSON(const BSONElement& bsonElem); + namespace sharding_ddl_util { /** diff --git a/src/mongo/db/s/sharding_ddl_util_test.cpp b/src/mongo/db/s/sharding_ddl_util_test.cpp index 7b373dfb284..ee4c94640e6 100644 --- a/src/mongo/db/s/sharding_ddl_util_test.cpp +++ b/src/mongo/db/s/sharding_ddl_util_test.cpp @@ -103,6 +103,98 @@ void findN(DBClientBase& client, } } +TEST_F(ShardingDDLUtilTest, SerializeDeserializeErrorStatusWithoutExtraInfo) { + const Status sample{ErrorCodes::ForTestingOptionalErrorExtraInfo, "Dummy reason"}; + + BSONObjBuilder bsonBuilder; + sharding_ddl_util_serializeErrorStatusToBSON(sample, "status", &bsonBuilder); + const auto serialized = bsonBuilder.done(); + + const auto deserialized = + sharding_ddl_util_deserializeErrorStatusFromBSON(serialized.firstElement()); + + ASSERT_EQ(sample.code(), deserialized.code()); + ASSERT_EQ(sample.reason(), deserialized.reason()); + ASSERT(!deserialized.extraInfo()); +} + +TEST_F(ShardingDDLUtilTest, SerializeDeserializeErrorStatusWithExtraInfo) { + OptionalErrorExtraInfoExample::EnableParserForTest whenInScope; + + const Status sample{ + ErrorCodes::ForTestingOptionalErrorExtraInfo, "Dummy reason", fromjson("{data: 123}")}; + + BSONObjBuilder bsonBuilder; + sharding_ddl_util_serializeErrorStatusToBSON(sample, "status", &bsonBuilder); + const auto serialized = bsonBuilder.done(); + + const auto deserialized = + sharding_ddl_util_deserializeErrorStatusFromBSON(serialized.firstElement()); + + ASSERT_EQ(sample.code(), deserialized.code()); + ASSERT_EQ(sample.reason(), deserialized.reason()); + ASSERT(deserialized.extraInfo()); + ASSERT(deserialized.extraInfo<OptionalErrorExtraInfoExample>()); + ASSERT_EQ(deserialized.extraInfo<OptionalErrorExtraInfoExample>()->data, 123); +} + +TEST_F(ShardingDDLUtilTest, SerializeDeserializeErrorStatusInvalid) { + BSONObjBuilder bsonBuilder; + ASSERT_THROWS_CODE( + sharding_ddl_util_serializeErrorStatusToBSON(Status::OK(), "status", &bsonBuilder), + DBException, + 7418500); + + const auto okStatusBSON = + BSON("status" << BSON("code" << ErrorCodes::OK << "codeName" + << ErrorCodes::errorString(ErrorCodes::OK))); + ASSERT_THROWS_CODE( + sharding_ddl_util_deserializeErrorStatusFromBSON(okStatusBSON.firstElement()), + DBException, + 7418501); +} + +TEST_F(ShardingDDLUtilTest, SerializeErrorStatusTooBig) { + const std::string longReason(1024 * 3, 'x'); + const Status sample{ErrorCodes::ForTestingOptionalErrorExtraInfo, longReason}; + + BSONObjBuilder bsonBuilder; + sharding_ddl_util_serializeErrorStatusToBSON(sample, "status", &bsonBuilder); + const auto serialized = bsonBuilder.done(); + + const auto deserialized = + sharding_ddl_util_deserializeErrorStatusFromBSON(serialized.firstElement()); + + ASSERT_EQ(ErrorCodes::TruncatedSerialization, deserialized.code()); + ASSERT_EQ( + "ForTestingOptionalErrorExtraInfo: " + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + deserialized.reason()); + ASSERT(!deserialized.extraInfo()); +} + // Test that config.collection document and config.chunks documents are properly updated from source // to destination collection metadata TEST_F(ShardingDDLUtilTest, ShardedRenameMetadata) { |