summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJack Mulrow <jack.mulrow@mongodb.com>2021-06-24 17:12:14 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-08-04 14:41:50 +0000
commita3d735522cd7af506ebe0d947c8e903b1c041b3e (patch)
treee93d6b9cc371f7451d47f8096e9e6f122f3da1e9 /src
parentdd6bc197f73bf0f40234db2be02efc76560693df (diff)
downloadmongo-a3d735522cd7af506ebe0d947c8e903b1c041b3e.tar.gz
SERVER-58068 Multi updates interrupted by tenant migration should wait for migration to complete
Diffstat (limited to 'src')
-rw-r--r--src/mongo/base/error_codes.yml5
-rw-r--r--src/mongo/db/catalog/multi_index_block.cpp4
-rw-r--r--src/mongo/db/commands/write_commands.cpp30
-rw-r--r--src/mongo/db/ops/write_ops_exec.cpp26
-rw-r--r--src/mongo/db/repl/tenant_migration_access_blocker_util.cpp33
-rw-r--r--src/mongo/db/repl/tenant_migration_access_blocker_util.h5
-rw-r--r--src/mongo/db/repl/tenant_migration_conflict_info.cpp7
-rw-r--r--src/mongo/db/repl/tenant_migration_conflict_info.h32
-rw-r--r--src/mongo/db/repl/tenant_migration_donor_access_blocker.cpp5
-rw-r--r--src/mongo/db/service_entry_point_common.cpp12
10 files changed, 106 insertions, 53 deletions
diff --git a/src/mongo/base/error_codes.yml b/src/mongo/base/error_codes.yml
index 8db9769c995..17d50a039f3 100644
--- a/src/mongo/base/error_codes.yml
+++ b/src/mongo/base/error_codes.yml
@@ -30,6 +30,7 @@ error_categories:
# Codes for internal use only that must never be returned in a network response.
- InternalOnly
- TenantMigrationError
+ - TenantMigrationConflictError
- CursorInvalidatedError
error_codes:
@@ -384,7 +385,7 @@ error_codes:
- {code: 318,name: ForTestingOptionalErrorExtraInfo,extra: OptionalErrorExtraInfoExample,extraIsOptional: True}
- {code: 319,name: MovePrimaryInProgress}
- - {code: 320, name: TenantMigrationConflict,extra: TenantMigrationConflictInfo,categories: [TenantMigrationError]}
+ - {code: 320, name: TenantMigrationConflict,extra: TenantMigrationConflictInfo,categories: [TenantMigrationError, TenantMigrationConflictError]}
- {code: 321, name: TenantMigrationCommitted, categories: [TenantMigrationError]}
- {code: 322, name: APIVersionError, categories: [VersionedAPIError]}
@@ -445,6 +446,8 @@ error_codes:
- {code: 352, name: UnsupportedOpQueryCommand}
+ - {code: 353, name: NonRetryableTenantMigrationConflict, extra: NonRetryableTenantMigrationConflictInfo, categories: [TenantMigrationError, TenantMigrationConflictError]}
+
# Error codes 4000-8999 are reserved.
# Non-sequential error codes for compatibility only)
diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp
index 8b927e844b2..8ed42bb3231 100644
--- a/src/mongo/db/catalog/multi_index_block.cpp
+++ b/src/mongo/db/catalog/multi_index_block.cpp
@@ -334,8 +334,8 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(
} catch (const WriteConflictException&) {
// Avoid converting WCE to Status.
throw;
- } catch (const TenantMigrationConflictException&) {
- // Avoid converting TenantMigrationConflictException to Status.
+ } catch (const ExceptionForCat<ErrorCategory::TenantMigrationConflictError>&) {
+ // Avoid converting TenantMigrationConflict errors to Status.
throw;
} catch (const TenantMigrationCommittedException&) {
// Avoid converting TenantMigrationCommittedException to Status.
diff --git a/src/mongo/db/commands/write_commands.cpp b/src/mongo/db/commands/write_commands.cpp
index 267a6bf1aa9..a914f54cca8 100644
--- a/src/mongo/db/commands/write_commands.cpp
+++ b/src/mongo/db/commands/write_commands.cpp
@@ -57,6 +57,7 @@
#include "mongo/db/repl/repl_client_info.h"
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/repl/tenant_migration_access_blocker_registry.h"
+#include "mongo/db/repl/tenant_migration_access_blocker_util.h"
#include "mongo/db/repl/tenant_migration_conflict_info.h"
#include "mongo/db/repl/tenant_migration_decoration.h"
#include "mongo/db/retryable_writes_stats.h"
@@ -312,25 +313,24 @@ boost::optional<BSONObj> generateError(OperationContext* opCtx,
status.extraInfo<doc_validation_error::DocumentValidationFailureInfo>()) {
error.append("code", static_cast<int>(ErrorCodes::DocumentValidationFailure));
error.append("errInfo", docValidationError->getDetails());
- } else if (ErrorCodes::isTenantMigrationError(status.code())) {
- if (ErrorCodes::TenantMigrationConflict == status.code()) {
- auto migrationConflictInfo = status.extraInfo<TenantMigrationConflictInfo>();
+ } else if (status.code() == ErrorCodes::TenantMigrationConflict) {
+ hangWriteBeforeWaitingForMigrationDecision.pauseWhileSet(opCtx);
- hangWriteBeforeWaitingForMigrationDecision.pauseWhileSet(opCtx);
+ auto migrationStatus =
+ tenant_migration_access_blocker::handleTenantMigrationConflict(opCtx, status);
- auto mtab = migrationConflictInfo->getTenantMigrationAccessBlocker();
+ // Interruption errors encountered during batch execution fail the entire batch, so throw on
+ // such errors here for consistency.
+ if (ErrorCodes::isInterruption(migrationStatus)) {
+ uassertStatusOK(migrationStatus);
+ }
- auto migrationStatus = mtab->waitUntilCommittedOrAborted(opCtx);
- mtab->recordTenantMigrationError(migrationStatus);
- error.append("code", static_cast<int>(migrationStatus.code()));
+ error.append("code", migrationStatus.code());
- // We want to append an empty errmsg for the errors after the first one, so let the
- // code below that appends errmsg do that.
- if (status.reason() != "") {
- error.append("errmsg", errorMessage(migrationStatus.reason()));
- }
- } else {
- error.append("code", int(status.code()));
+ // We want to append an empty errmsg for the errors after the first one, so let the
+ // code below that appends errmsg do that.
+ if (status.reason() != "") {
+ error.append("errmsg", errorMessage(migrationStatus.reason()));
}
} else {
error.append("code", int(status.code()));
diff --git a/src/mongo/db/ops/write_ops_exec.cpp b/src/mongo/db/ops/write_ops_exec.cpp
index c08c854a2f2..19b9039f79c 100644
--- a/src/mongo/db/ops/write_ops_exec.cpp
+++ b/src/mongo/db/ops/write_ops_exec.cpp
@@ -66,6 +66,7 @@
#include "mongo/db/record_id_helpers.h"
#include "mongo/db/repl/repl_client_info.h"
#include "mongo/db/repl/replication_coordinator.h"
+#include "mongo/db/repl/tenant_migration_conflict_info.h"
#include "mongo/db/repl/tenant_migration_decoration.h"
#include "mongo/db/retryable_writes_stats.h"
#include "mongo/db/s/collection_sharding_state.h"
@@ -293,17 +294,24 @@ bool handleError(OperationContext* opCtx,
}
if (ErrorCodes::isTenantMigrationError(ex)) {
+ // Multiple not-idempotent updates are not safe to retry at the cloud level. We treat these
+ // the same as an interruption due to a repl state change and fail the whole batch.
if (isMultiUpdate) {
- BSONObjBuilder builder;
- ex.serialize(&builder);
- // Multiple not-idempotent updates are not safe to retry at the cloud level. To indicate
- // this, we replace the original error.
- out->results.emplace_back(
- Status(ErrorCodes::Interrupted,
- str::stream() << "Multi update was interrupted by error: " << ex.reason(),
- builder.obj()));
- return false;
+ if (ex.code() != ErrorCodes::TenantMigrationConflict) {
+ uassertStatusOK(kNonRetryableTenantMigrationStatus);
+ }
+
+ // If the migration is active, we throw a different code that will be caught higher up
+ // and replaced with a non-retryable code after the migration finishes to avoid wasted
+ // retries.
+ auto migrationConflictInfo = ex.toStatus().extraInfo<TenantMigrationConflictInfo>();
+ uassertStatusOK(
+ Status(NonRetryableTenantMigrationConflictInfo(
+ migrationConflictInfo->getTenantId(),
+ migrationConflictInfo->getTenantMigrationAccessBlocker()),
+ "Multi update must block until this tenant migration commits or aborts"));
}
+
// If an op fails due to a TenantMigrationError then subsequent ops will also fail due to a
// migration blocking, committing, or aborting.
out->results.emplace_back(ex.toStatus());
diff --git a/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp b/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp
index e1e4250b2b7..b3d2f881f95 100644
--- a/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp
+++ b/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp
@@ -223,12 +223,12 @@ SemiFuture<void> checkIfCanReadOrBlock(OperationContext* opCtx, const OpMsgReque
}
if (isCanceledDueToTimeout) {
- return Status(
- timeoutError,
- "Blocked read timed out waiting for tenant migration to commit or abort");
+ return Status(timeoutError,
+ "Blocked read timed out waiting for an internal data migration "
+ "to commit or abort");
}
- return status.withContext("Canceled read blocked by tenant migration");
+ return status.withContext("Canceled read blocked by internal data migration");
})
.semi(); // To require continuation in the user executor.
}
@@ -384,14 +384,33 @@ void recoverTenantMigrationAccessBlockers(OperationContext* opCtx) {
});
}
-void handleTenantMigrationConflict(OperationContext* opCtx, Status status) {
- auto migrationConflictInfo = status.extraInfo<TenantMigrationConflictInfo>();
+template <typename MigrationConflictInfoType>
+Status _handleTenantMigrationConflict(OperationContext* opCtx, Status status) {
+ auto migrationConflictInfo = status.extraInfo<MigrationConflictInfoType>();
invariant(migrationConflictInfo);
auto mtab = migrationConflictInfo->getTenantMigrationAccessBlocker();
invariant(mtab);
auto migrationStatus = mtab->waitUntilCommittedOrAborted(opCtx);
mtab->recordTenantMigrationError(migrationStatus);
- uassertStatusOK(migrationStatus);
+ return migrationStatus;
+}
+
+Status handleTenantMigrationConflict(OperationContext* opCtx, Status status) {
+ if (status.code() == ErrorCodes::NonRetryableTenantMigrationConflict) {
+ auto migrationStatus =
+ _handleTenantMigrationConflict<NonRetryableTenantMigrationConflictInfo>(opCtx, status);
+
+ // Some operations, like multi updates, can't safely be automatically retried so we return a
+ // non retryable error instead of TenantMigrationCommitted/TenantMigrationAborted. If
+ // waiting failed for a different reason, e.g. MaxTimeMS expiring, propagate that to the
+ // user unchanged.
+ if (ErrorCodes::isTenantMigrationError(migrationStatus)) {
+ return kNonRetryableTenantMigrationStatus;
+ }
+ return migrationStatus;
+ }
+
+ return _handleTenantMigrationConflict<TenantMigrationConflictInfo>(opCtx, status);
}
void performNoopWrite(OperationContext* opCtx, StringData msg) {
diff --git a/src/mongo/db/repl/tenant_migration_access_blocker_util.h b/src/mongo/db/repl/tenant_migration_access_blocker_util.h
index f5d9a36b89e..06890310015 100644
--- a/src/mongo/db/repl/tenant_migration_access_blocker_util.h
+++ b/src/mongo/db/repl/tenant_migration_access_blocker_util.h
@@ -93,9 +93,10 @@ void recoverTenantMigrationAccessBlockers(OperationContext* opCtx);
/**
* Blocks until the migration commits or aborts, then returns TenantMigrationCommitted or
- * TenantMigrationAborted.
+ * TenantMigrationAborted, or a non-retryable error if the given status is
+ * NonRetryableTenantMigrationConflict.
*/
-void handleTenantMigrationConflict(OperationContext* opCtx, Status status);
+Status handleTenantMigrationConflict(OperationContext* opCtx, Status status);
/**
* Appends a no-op to the oplog.
diff --git a/src/mongo/db/repl/tenant_migration_conflict_info.cpp b/src/mongo/db/repl/tenant_migration_conflict_info.cpp
index 913eea2aa6e..a53c2a6edda 100644
--- a/src/mongo/db/repl/tenant_migration_conflict_info.cpp
+++ b/src/mongo/db/repl/tenant_migration_conflict_info.cpp
@@ -38,17 +38,18 @@ namespace mongo {
namespace {
MONGO_INIT_REGISTER_ERROR_EXTRA_INFO(TenantMigrationConflictInfo);
+MONGO_INIT_REGISTER_ERROR_EXTRA_INFO(NonRetryableTenantMigrationConflictInfo);
constexpr StringData kTenantIdFieldName = "tenantId"_sd;
} // namespace
-void TenantMigrationConflictInfo::serialize(BSONObjBuilder* bob) const {
+void TenantMigrationConflictInfoBase::serialize(BSONObjBuilder* bob) const {
bob->append(kTenantIdFieldName, _tenantId);
}
-std::shared_ptr<const ErrorExtraInfo> TenantMigrationConflictInfo::parse(const BSONObj& obj) {
- return std::make_shared<TenantMigrationConflictInfo>(obj[kTenantIdFieldName].String());
+std::shared_ptr<const ErrorExtraInfo> TenantMigrationConflictInfoBase::parse(const BSONObj& obj) {
+ return std::make_shared<TenantMigrationConflictInfoBase>(obj[kTenantIdFieldName].String());
}
} // namespace mongo
diff --git a/src/mongo/db/repl/tenant_migration_conflict_info.h b/src/mongo/db/repl/tenant_migration_conflict_info.h
index 4f9c9c087e1..9c5b126e32c 100644
--- a/src/mongo/db/repl/tenant_migration_conflict_info.h
+++ b/src/mongo/db/repl/tenant_migration_conflict_info.h
@@ -36,12 +36,14 @@
namespace mongo {
-class TenantMigrationConflictInfo final : public ErrorExtraInfo {
-public:
- static constexpr auto code = ErrorCodes::TenantMigrationConflict;
+const Status kNonRetryableTenantMigrationStatus(
+ ErrorCodes::Interrupted,
+ "Operation interrupted by an internal data migration and could not be automatically retried");
- TenantMigrationConflictInfo(const std::string tenantId,
- std::shared_ptr<TenantMigrationAccessBlocker> mtab = nullptr)
+class TenantMigrationConflictInfoBase : public ErrorExtraInfo {
+public:
+ TenantMigrationConflictInfoBase(const std::string tenantId,
+ std::shared_ptr<TenantMigrationAccessBlocker> mtab = nullptr)
: _tenantId(std::move(tenantId)), _mtab(std::move(mtab)){};
const auto& getTenantId() const {
@@ -59,7 +61,25 @@ private:
std::string _tenantId;
std::shared_ptr<TenantMigrationAccessBlocker> _mtab;
};
-using TenantMigrationConflictException = ExceptionFor<ErrorCodes::TenantMigrationConflict>;
+
+class TenantMigrationConflictInfo final : public TenantMigrationConflictInfoBase {
+public:
+ static constexpr auto code = ErrorCodes::TenantMigrationConflict;
+
+ TenantMigrationConflictInfo(const std::string tenantId,
+ std::shared_ptr<TenantMigrationAccessBlocker> mtab = nullptr)
+ : TenantMigrationConflictInfoBase(std::move(tenantId), std::move(mtab)) {}
+};
+
+class NonRetryableTenantMigrationConflictInfo final : public TenantMigrationConflictInfoBase {
+public:
+ static constexpr auto code = ErrorCodes::NonRetryableTenantMigrationConflict;
+
+ NonRetryableTenantMigrationConflictInfo(
+ const std::string tenantId, std::shared_ptr<TenantMigrationAccessBlocker> mtab = nullptr)
+ : TenantMigrationConflictInfoBase(std::move(tenantId), std::move(mtab)) {}
+};
+
using TenantMigrationCommittedException = ExceptionFor<ErrorCodes::TenantMigrationCommitted>;
} // namespace mongo
diff --git a/src/mongo/db/repl/tenant_migration_donor_access_blocker.cpp b/src/mongo/db/repl/tenant_migration_donor_access_blocker.cpp
index 0b25fb1a9d3..6cff13ad468 100644
--- a/src/mongo/db/repl/tenant_migration_donor_access_blocker.cpp
+++ b/src/mongo/db/repl/tenant_migration_donor_access_blocker.cpp
@@ -129,8 +129,9 @@ Status TenantMigrationDonorAccessBlocker::waitUntilCommittedOrAborted(OperationC
return status;
} else if (idx == 1) {
// Deadline finished first, return error
- return Status(opCtx->getTimeoutError(),
- "Operation timed out waiting for tenant migration to commit or abort");
+ return Status(
+ opCtx->getTimeoutError(),
+ "Operation timed out waiting for an internal data migration to commit or abort");
}
MONGO_UNREACHABLE;
}
diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp
index ae999a37ff0..68b1bdebe04 100644
--- a/src/mongo/db/service_entry_point_common.cpp
+++ b/src/mongo/db/service_entry_point_common.cpp
@@ -768,9 +768,9 @@ Future<void> InvokeCommand::run() {
.get(execContext->getOpCtx());
return runCommandInvocation(_ecd->getExecutionContext(), _ecd->getInvocation());
})
- .onError<ErrorCodes::TenantMigrationConflict>([this](Status status) {
- tenant_migration_access_blocker::handleTenantMigrationConflict(
- _ecd->getExecutionContext()->getOpCtx(), std::move(status));
+ .onErrorCategory<ErrorCategory::TenantMigrationConflictError>([this](Status status) {
+ uassertStatusOK(tenant_migration_access_blocker::handleTenantMigrationConflict(
+ _ecd->getExecutionContext()->getOpCtx(), std::move(status)));
return Status::OK();
});
}
@@ -786,13 +786,13 @@ Future<void> CheckoutSessionAndInvokeCommand::run() {
.get(execContext->getOpCtx());
return runCommandInvocation(_ecd->getExecutionContext(), _ecd->getInvocation());
})
- .onError<ErrorCodes::TenantMigrationConflict>([this](Status status) {
+ .onErrorCategory<ErrorCategory::TenantMigrationConflictError>([this](Status status) {
// Abort transaction and clean up transaction resources before blocking the
// command to allow the stable timestamp on the node to advance.
_cleanupTransaction();
- tenant_migration_access_blocker::handleTenantMigrationConflict(
- _ecd->getExecutionContext()->getOpCtx(), std::move(status));
+ uassertStatusOK(tenant_migration_access_blocker::handleTenantMigrationConflict(
+ _ecd->getExecutionContext()->getOpCtx(), std::move(status)));
})
.tapError([this](Status status) { _tapError(status); })
.then([this] { return _commitInvocation(); });