diff options
author | Blake Oler <blake.oler@mongodb.com> | 2020-02-11 18:23:49 +0000 |
---|---|---|
committer | evergreen <evergreen@mongodb.com> | 2020-02-11 18:23:49 +0000 |
commit | 8efa8a3dbe512d8f192248dbd9ecbd984d18bce2 (patch) | |
tree | eb9834cb605c91b3d684d8141b80b5dcb9c6bf5c /src/mongo/s | |
parent | 911a1e74137060f712e20ec9342492fbe69b5e49 (diff) | |
download | mongo-8efa8a3dbe512d8f192248dbd9ecbd984d18bce2.tar.gz |
SERVER-45981 Prevent duplicating action upon receiving notice that a given shard is stale
Diffstat (limited to 'src/mongo/s')
-rw-r--r-- | src/mongo/s/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/s/chunk_manager.cpp | 3 | ||||
-rw-r--r-- | src/mongo/s/commands/strategy.cpp | 80 | ||||
-rw-r--r-- | src/mongo/s/query/cluster_find.cpp | 24 | ||||
-rw-r--r-- | src/mongo/s/shard_invalidated_for_targeting_exception.cpp | 59 | ||||
-rw-r--r-- | src/mongo/s/shard_invalidated_for_targeting_exception.h | 73 |
6 files changed, 219 insertions, 21 deletions
diff --git a/src/mongo/s/SConscript b/src/mongo/s/SConscript index 4d154810bd0..40e5544df80 100644 --- a/src/mongo/s/SConscript +++ b/src/mongo/s/SConscript @@ -175,6 +175,7 @@ env.Library( 'chunk_version.cpp', 'database_version_helpers.cpp', 'shard_id.cpp', + 'shard_invalidated_for_targeting_exception.cpp', 'stale_exception.cpp', 'would_change_owning_shard_exception.cpp', env.Idlc('catalog/type_chunk_base.idl')[0], diff --git a/src/mongo/s/chunk_manager.cpp b/src/mongo/s/chunk_manager.cpp index e4df0035aa3..6b3973846fb 100644 --- a/src/mongo/s/chunk_manager.cpp +++ b/src/mongo/s/chunk_manager.cpp @@ -42,6 +42,7 @@ #include "mongo/db/query/query_planner_common.h" #include "mongo/db/storage/key_string.h" #include "mongo/s/chunk_writes_tracker.h" +#include "mongo/s/shard_invalidated_for_targeting_exception.h" #include "mongo/util/log.h" namespace mongo { @@ -409,7 +410,7 @@ ChunkVersion RoutingTableHistory::getVersion(const ShardId& shardName) const { return ChunkVersion(0, 0, _collectionVersion.epoch()); } - uassert(StaleConfigInfo(_nss, {}, {}, shardName), + uassert(ShardInvalidatedForTargetingInfo(_nss), "shard has been marked stale", !it->second.isStale.load()); diff --git a/src/mongo/s/commands/strategy.cpp b/src/mongo/s/commands/strategy.cpp index e0e2221ce3c..17b6aa0bc14 100644 --- a/src/mongo/s/commands/strategy.cpp +++ b/src/mongo/s/commands/strategy.cpp @@ -77,6 +77,7 @@ #include "mongo/s/query/cluster_cursor_manager.h" #include "mongo/s/query/cluster_find.h" #include "mongo/s/session_catalog_router.h" +#include "mongo/s/shard_invalidated_for_targeting_exception.h" #include "mongo/s/stale_exception.h" #include "mongo/s/transaction_router.h" #include "mongo/util/fail_point.h" @@ -172,6 +173,7 @@ void invokeInTransactionRouter(OperationContext* opCtx, } catch (const DBException& e) { if (ErrorCodes::isSnapshotError(e.code()) || ErrorCodes::isNeedRetargettingError(e.code()) || + e.code() == ErrorCodes::ShardInvalidatedForTargeting || e.code() == ErrorCodes::StaleDbVersion) { // Don't abort on possibly retryable errors. throw; @@ -549,6 +551,48 @@ void runCommand(OperationContext* opCtx, } return; + } catch (ShardInvalidatedForTargetingException& ex) { + auto catalogCache = Grid::get(opCtx)->catalogCache(); + catalogCache->setOperationShouldBlockBehindCatalogCacheRefresh(opCtx, true); + + // Retry logic specific to transactions. Throws and aborts the transaction if the + // error cannot be retried on. + if (auto txnRouter = TransactionRouter::get(opCtx)) { + auto abortGuard = makeGuard( + [&] { txnRouter.implicitlyAbortTransaction(opCtx, ex.toStatus()); }); + + if (!canRetry) { + addContextForTransactionAbortingError(txnRouter.txnIdToString(), + txnRouter.getLatestStmtId(), + ex, + "exhausted retries"); + throw; + } + + // TODO SERVER-39704 Allow mongos to retry on stale shard, stale db, snapshot, + // or shard invalidated for targeting errors. + if (!txnRouter.canContinueOnStaleShardOrDbError(commandName)) { + (void)catalogCache->getCollectionRoutingInfoWithRefresh( + opCtx, ex.extraInfo<ShardInvalidatedForTargetingInfo>()->getNss()); + addContextForTransactionAbortingError( + txnRouter.txnIdToString(), + txnRouter.getLatestStmtId(), + ex, + "an error from cluster data placement change"); + throw; + } + + // The error is retryable, so update transaction state before retrying. + txnRouter.onStaleShardOrDbError(opCtx, commandName, ex.toStatus()); + + abortGuard.dismiss(); + continue; + } + + if (canRetry) { + continue; + } + throw; } catch (ExceptionForCat<ErrorCategory::NeedRetargettingError>& ex) { const auto staleNs = [&] { if (auto staleInfo = ex.extraInfo<StaleConfigInfo>()) { @@ -569,24 +613,24 @@ void runCommand(OperationContext* opCtx, ShardConnection::checkMyConnectionVersions(opCtx, staleNs.ns()); } + auto catalogCache = Grid::get(opCtx)->catalogCache(); + if (auto staleInfo = ex.extraInfo<StaleConfigInfo>()) { - Grid::get(opCtx) - ->catalogCache() - ->invalidateShardOrEntireCollectionEntryForShardedCollection( - opCtx, - staleNs, - staleInfo->getVersionWanted(), - staleInfo->getVersionReceived(), - staleInfo->getShardId()); + catalogCache->invalidateShardOrEntireCollectionEntryForShardedCollection( + opCtx, + staleNs, + staleInfo->getVersionWanted(), + staleInfo->getVersionReceived(), + staleInfo->getShardId()); } else { // If we don't have the stale config info and therefore don't know the shard's // id, we have to force all further targetting requests for the namespace to // block on a refresh. - Grid::get(opCtx)->catalogCache()->onEpochChange(staleNs); + catalogCache->onEpochChange(staleNs); } - Grid::get(opCtx)->catalogCache()->setOperationShouldBlockBehindCatalogCacheRefresh( - opCtx, true); + + catalogCache->setOperationShouldBlockBehindCatalogCacheRefresh(opCtx, true); // Retry logic specific to transactions. Throws and aborts the transaction if the // error cannot be retried on. @@ -602,6 +646,8 @@ void runCommand(OperationContext* opCtx, throw; } + // TODO SERVER-39704 Allow mongos to retry on stale shard, stale db, snapshot, + // or shard invalidated for targeting errors. if (!txnRouter.canContinueOnStaleShardOrDbError(commandName)) { addContextForTransactionAbortingError( txnRouter.txnIdToString(), @@ -641,6 +687,8 @@ void runCommand(OperationContext* opCtx, throw; } + // TODO SERVER-39704 Allow mongos to retry on stale shard, stale db, snapshot, + // or shard invalidated for targeting errors. if (!txnRouter.canContinueOnStaleShardOrDbError(commandName)) { addContextForTransactionAbortingError( txnRouter.txnIdToString(), @@ -678,6 +726,8 @@ void runCommand(OperationContext* opCtx, throw; } + // TODO SERVER-39704 Allow mongos to retry on stale shard, stale db, snapshot, + // or shard invalidated for targeting errors. if (!txnRouter.canContinueOnSnapshotError()) { addContextForTransactionAbortingError(txnRouter.txnIdToString(), txnRouter.getLatestStmtId(), @@ -1079,6 +1129,14 @@ void Strategy::explainFind(OperationContext* opCtx, qr.getCollation()); millisElapsed = timer.millis(); break; + } catch (ExceptionFor<ErrorCodes::ShardInvalidatedForTargeting>&) { + Grid::get(opCtx)->catalogCache()->setOperationShouldBlockBehindCatalogCacheRefresh( + opCtx, true); + + if (canRetry) { + continue; + } + throw; } catch (const ExceptionForCat<ErrorCategory::NeedRetargettingError>& ex) { const auto staleNs = [&] { if (auto staleInfo = ex.extraInfo<StaleConfigInfo>()) { diff --git a/src/mongo/s/query/cluster_find.cpp b/src/mongo/s/query/cluster_find.cpp index ba071ef4bde..74e1840ccac 100644 --- a/src/mongo/s/query/cluster_find.cpp +++ b/src/mongo/s/query/cluster_find.cpp @@ -564,6 +564,7 @@ CursorId ClusterFind::runQuery(OperationContext* opCtx, << "Failed to run query after " << kMaxRetries << " retries"); throw; } else if (!ErrorCodes::isStaleShardVersionError(ex.code()) && + ex.code() != ErrorCodes::ShardInvalidatedForTargeting && ex.code() != ErrorCodes::ShardNotFound) { // Errors other than stale metadata or from trying to reach a non existent shard are // fatal to the operation. Network errors and replication retries happen at the @@ -575,21 +576,26 @@ CursorId ClusterFind::runQuery(OperationContext* opCtx, LOG(1) << "Received error status for query " << redact(query.toStringShort()) << " on attempt " << retries << " of " << kMaxRetries << ": " << redact(ex); - if (auto staleInfo = ex.extraInfo<StaleConfigInfo>()) { - catalogCache->invalidateShardOrEntireCollectionEntryForShardedCollection( - opCtx, - query.nss(), - staleInfo->getVersionWanted(), - staleInfo->getVersionReceived(), - staleInfo->getShardId()); - } else { - catalogCache->onEpochChange(query.nss()); + if (ex.code() != ErrorCodes::ShardInvalidatedForTargeting) { + if (auto staleInfo = ex.extraInfo<StaleConfigInfo>()) { + catalogCache->invalidateShardOrEntireCollectionEntryForShardedCollection( + opCtx, + query.nss(), + staleInfo->getVersionWanted(), + staleInfo->getVersionReceived(), + staleInfo->getShardId()); + } else { + catalogCache->onEpochChange(query.nss()); + } } catalogCache->setOperationShouldBlockBehindCatalogCacheRefresh(opCtx, true); if (auto txnRouter = TransactionRouter::get(opCtx)) { if (!txnRouter.canContinueOnStaleShardOrDbError(kFindCmdName)) { + if (ex.code() == ErrorCodes::ShardInvalidatedForTargeting) { + (void)catalogCache->getCollectionRoutingInfoWithRefresh(opCtx, query.nss()); + } throw; } diff --git a/src/mongo/s/shard_invalidated_for_targeting_exception.cpp b/src/mongo/s/shard_invalidated_for_targeting_exception.cpp new file mode 100644 index 00000000000..52a52dfef14 --- /dev/null +++ b/src/mongo/s/shard_invalidated_for_targeting_exception.cpp @@ -0,0 +1,59 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/s/shard_invalidated_for_targeting_exception.h" + +#include "mongo/base/init.h" +#include "mongo/util/assert_util.h" + +namespace mongo { +namespace { + +MONGO_INIT_REGISTER_ERROR_EXTRA_INFO(ShardInvalidatedForTargetingInfo); + +constexpr StringData kNss = "nss"_sd; + +} // namespace + +void ShardInvalidatedForTargetingInfo::serialize(BSONObjBuilder* bob) const { + bob->append(kNss, _nss.ns()); +} + +std::shared_ptr<const ErrorExtraInfo> ShardInvalidatedForTargetingInfo::parse(const BSONObj& obj) { + return std::make_shared<ShardInvalidatedForTargetingInfo>(parseFromCommandError(obj)); +} + +ShardInvalidatedForTargetingInfo ShardInvalidatedForTargetingInfo::parseFromCommandError( + const BSONObj& obj) { + return ShardInvalidatedForTargetingInfo(NamespaceString(obj["nss"].String())); +} + +} // namespace mongo diff --git a/src/mongo/s/shard_invalidated_for_targeting_exception.h b/src/mongo/s/shard_invalidated_for_targeting_exception.h new file mode 100644 index 00000000000..16d848cd372 --- /dev/null +++ b/src/mongo/s/shard_invalidated_for_targeting_exception.h @@ -0,0 +1,73 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/base/error_extra_info.h" +#include "mongo/bson/bsonobj.h" +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/db/namespace_string.h" + +namespace mongo { + +/** + * This error is thrown when a stale shard is found when attempting to retrieve a shard's shard + * version for a namespace. The router (mongos) will retry a command when encountering this error, + * and will block on a catalog cache refresh. + * + * TODO SERVER-39704 Allow transactions to retry upon receiving a ShardInvalidatedForTargeting + * error. + */ +class ShardInvalidatedForTargetingInfo final : public ErrorExtraInfo { +public: + static constexpr auto code = ErrorCodes::ShardInvalidatedForTargeting; + + explicit ShardInvalidatedForTargetingInfo(NamespaceString nss) : _nss(nss){}; + + const auto& getNss() const { + return _nss; + } + + BSONObj toBSON() const { + BSONObjBuilder bob; + serialize(&bob); + return bob.obj(); + } + + void serialize(BSONObjBuilder* bob) const override; + static std::shared_ptr<const ErrorExtraInfo> parse(const BSONObj&); + static ShardInvalidatedForTargetingInfo parseFromCommandError(const BSONObj& commandError); + +private: + NamespaceString _nss; +}; +using ShardInvalidatedForTargetingException = + ExceptionFor<ErrorCodes::ShardInvalidatedForTargeting>; + +} // namespace mongo |