summaryrefslogtreecommitdiff
path: root/src/mongo/s
diff options
context:
space:
mode:
authorBlake Oler <blake.oler@mongodb.com>2020-02-11 18:23:49 +0000
committerevergreen <evergreen@mongodb.com>2020-02-11 18:23:49 +0000
commit8efa8a3dbe512d8f192248dbd9ecbd984d18bce2 (patch)
treeeb9834cb605c91b3d684d8141b80b5dcb9c6bf5c /src/mongo/s
parent911a1e74137060f712e20ec9342492fbe69b5e49 (diff)
downloadmongo-8efa8a3dbe512d8f192248dbd9ecbd984d18bce2.tar.gz
SERVER-45981 Prevent duplicating action upon receiving notice that a given shard is stale
Diffstat (limited to 'src/mongo/s')
-rw-r--r--src/mongo/s/SConscript1
-rw-r--r--src/mongo/s/chunk_manager.cpp3
-rw-r--r--src/mongo/s/commands/strategy.cpp80
-rw-r--r--src/mongo/s/query/cluster_find.cpp24
-rw-r--r--src/mongo/s/shard_invalidated_for_targeting_exception.cpp59
-rw-r--r--src/mongo/s/shard_invalidated_for_targeting_exception.h73
6 files changed, 219 insertions, 21 deletions
diff --git a/src/mongo/s/SConscript b/src/mongo/s/SConscript
index 4d154810bd0..40e5544df80 100644
--- a/src/mongo/s/SConscript
+++ b/src/mongo/s/SConscript
@@ -175,6 +175,7 @@ env.Library(
'chunk_version.cpp',
'database_version_helpers.cpp',
'shard_id.cpp',
+ 'shard_invalidated_for_targeting_exception.cpp',
'stale_exception.cpp',
'would_change_owning_shard_exception.cpp',
env.Idlc('catalog/type_chunk_base.idl')[0],
diff --git a/src/mongo/s/chunk_manager.cpp b/src/mongo/s/chunk_manager.cpp
index e4df0035aa3..6b3973846fb 100644
--- a/src/mongo/s/chunk_manager.cpp
+++ b/src/mongo/s/chunk_manager.cpp
@@ -42,6 +42,7 @@
#include "mongo/db/query/query_planner_common.h"
#include "mongo/db/storage/key_string.h"
#include "mongo/s/chunk_writes_tracker.h"
+#include "mongo/s/shard_invalidated_for_targeting_exception.h"
#include "mongo/util/log.h"
namespace mongo {
@@ -409,7 +410,7 @@ ChunkVersion RoutingTableHistory::getVersion(const ShardId& shardName) const {
return ChunkVersion(0, 0, _collectionVersion.epoch());
}
- uassert(StaleConfigInfo(_nss, {}, {}, shardName),
+ uassert(ShardInvalidatedForTargetingInfo(_nss),
"shard has been marked stale",
!it->second.isStale.load());
diff --git a/src/mongo/s/commands/strategy.cpp b/src/mongo/s/commands/strategy.cpp
index e0e2221ce3c..17b6aa0bc14 100644
--- a/src/mongo/s/commands/strategy.cpp
+++ b/src/mongo/s/commands/strategy.cpp
@@ -77,6 +77,7 @@
#include "mongo/s/query/cluster_cursor_manager.h"
#include "mongo/s/query/cluster_find.h"
#include "mongo/s/session_catalog_router.h"
+#include "mongo/s/shard_invalidated_for_targeting_exception.h"
#include "mongo/s/stale_exception.h"
#include "mongo/s/transaction_router.h"
#include "mongo/util/fail_point.h"
@@ -172,6 +173,7 @@ void invokeInTransactionRouter(OperationContext* opCtx,
} catch (const DBException& e) {
if (ErrorCodes::isSnapshotError(e.code()) ||
ErrorCodes::isNeedRetargettingError(e.code()) ||
+ e.code() == ErrorCodes::ShardInvalidatedForTargeting ||
e.code() == ErrorCodes::StaleDbVersion) {
// Don't abort on possibly retryable errors.
throw;
@@ -549,6 +551,48 @@ void runCommand(OperationContext* opCtx,
}
return;
+ } catch (ShardInvalidatedForTargetingException& ex) {
+ auto catalogCache = Grid::get(opCtx)->catalogCache();
+ catalogCache->setOperationShouldBlockBehindCatalogCacheRefresh(opCtx, true);
+
+ // Retry logic specific to transactions. Throws and aborts the transaction if the
+ // error cannot be retried on.
+ if (auto txnRouter = TransactionRouter::get(opCtx)) {
+ auto abortGuard = makeGuard(
+ [&] { txnRouter.implicitlyAbortTransaction(opCtx, ex.toStatus()); });
+
+ if (!canRetry) {
+ addContextForTransactionAbortingError(txnRouter.txnIdToString(),
+ txnRouter.getLatestStmtId(),
+ ex,
+ "exhausted retries");
+ throw;
+ }
+
+ // TODO SERVER-39704 Allow mongos to retry on stale shard, stale db, snapshot,
+ // or shard invalidated for targeting errors.
+ if (!txnRouter.canContinueOnStaleShardOrDbError(commandName)) {
+ (void)catalogCache->getCollectionRoutingInfoWithRefresh(
+ opCtx, ex.extraInfo<ShardInvalidatedForTargetingInfo>()->getNss());
+ addContextForTransactionAbortingError(
+ txnRouter.txnIdToString(),
+ txnRouter.getLatestStmtId(),
+ ex,
+ "an error from cluster data placement change");
+ throw;
+ }
+
+ // The error is retryable, so update transaction state before retrying.
+ txnRouter.onStaleShardOrDbError(opCtx, commandName, ex.toStatus());
+
+ abortGuard.dismiss();
+ continue;
+ }
+
+ if (canRetry) {
+ continue;
+ }
+ throw;
} catch (ExceptionForCat<ErrorCategory::NeedRetargettingError>& ex) {
const auto staleNs = [&] {
if (auto staleInfo = ex.extraInfo<StaleConfigInfo>()) {
@@ -569,24 +613,24 @@ void runCommand(OperationContext* opCtx,
ShardConnection::checkMyConnectionVersions(opCtx, staleNs.ns());
}
+ auto catalogCache = Grid::get(opCtx)->catalogCache();
+
if (auto staleInfo = ex.extraInfo<StaleConfigInfo>()) {
- Grid::get(opCtx)
- ->catalogCache()
- ->invalidateShardOrEntireCollectionEntryForShardedCollection(
- opCtx,
- staleNs,
- staleInfo->getVersionWanted(),
- staleInfo->getVersionReceived(),
- staleInfo->getShardId());
+ catalogCache->invalidateShardOrEntireCollectionEntryForShardedCollection(
+ opCtx,
+ staleNs,
+ staleInfo->getVersionWanted(),
+ staleInfo->getVersionReceived(),
+ staleInfo->getShardId());
} else {
// If we don't have the stale config info and therefore don't know the shard's
// id, we have to force all further targetting requests for the namespace to
// block on a refresh.
- Grid::get(opCtx)->catalogCache()->onEpochChange(staleNs);
+ catalogCache->onEpochChange(staleNs);
}
- Grid::get(opCtx)->catalogCache()->setOperationShouldBlockBehindCatalogCacheRefresh(
- opCtx, true);
+
+ catalogCache->setOperationShouldBlockBehindCatalogCacheRefresh(opCtx, true);
// Retry logic specific to transactions. Throws and aborts the transaction if the
// error cannot be retried on.
@@ -602,6 +646,8 @@ void runCommand(OperationContext* opCtx,
throw;
}
+ // TODO SERVER-39704 Allow mongos to retry on stale shard, stale db, snapshot,
+ // or shard invalidated for targeting errors.
if (!txnRouter.canContinueOnStaleShardOrDbError(commandName)) {
addContextForTransactionAbortingError(
txnRouter.txnIdToString(),
@@ -641,6 +687,8 @@ void runCommand(OperationContext* opCtx,
throw;
}
+ // TODO SERVER-39704 Allow mongos to retry on stale shard, stale db, snapshot,
+ // or shard invalidated for targeting errors.
if (!txnRouter.canContinueOnStaleShardOrDbError(commandName)) {
addContextForTransactionAbortingError(
txnRouter.txnIdToString(),
@@ -678,6 +726,8 @@ void runCommand(OperationContext* opCtx,
throw;
}
+ // TODO SERVER-39704 Allow mongos to retry on stale shard, stale db, snapshot,
+ // or shard invalidated for targeting errors.
if (!txnRouter.canContinueOnSnapshotError()) {
addContextForTransactionAbortingError(txnRouter.txnIdToString(),
txnRouter.getLatestStmtId(),
@@ -1079,6 +1129,14 @@ void Strategy::explainFind(OperationContext* opCtx,
qr.getCollation());
millisElapsed = timer.millis();
break;
+ } catch (ExceptionFor<ErrorCodes::ShardInvalidatedForTargeting>&) {
+ Grid::get(opCtx)->catalogCache()->setOperationShouldBlockBehindCatalogCacheRefresh(
+ opCtx, true);
+
+ if (canRetry) {
+ continue;
+ }
+ throw;
} catch (const ExceptionForCat<ErrorCategory::NeedRetargettingError>& ex) {
const auto staleNs = [&] {
if (auto staleInfo = ex.extraInfo<StaleConfigInfo>()) {
diff --git a/src/mongo/s/query/cluster_find.cpp b/src/mongo/s/query/cluster_find.cpp
index ba071ef4bde..74e1840ccac 100644
--- a/src/mongo/s/query/cluster_find.cpp
+++ b/src/mongo/s/query/cluster_find.cpp
@@ -564,6 +564,7 @@ CursorId ClusterFind::runQuery(OperationContext* opCtx,
<< "Failed to run query after " << kMaxRetries << " retries");
throw;
} else if (!ErrorCodes::isStaleShardVersionError(ex.code()) &&
+ ex.code() != ErrorCodes::ShardInvalidatedForTargeting &&
ex.code() != ErrorCodes::ShardNotFound) {
// Errors other than stale metadata or from trying to reach a non existent shard are
// fatal to the operation. Network errors and replication retries happen at the
@@ -575,21 +576,26 @@ CursorId ClusterFind::runQuery(OperationContext* opCtx,
LOG(1) << "Received error status for query " << redact(query.toStringShort())
<< " on attempt " << retries << " of " << kMaxRetries << ": " << redact(ex);
- if (auto staleInfo = ex.extraInfo<StaleConfigInfo>()) {
- catalogCache->invalidateShardOrEntireCollectionEntryForShardedCollection(
- opCtx,
- query.nss(),
- staleInfo->getVersionWanted(),
- staleInfo->getVersionReceived(),
- staleInfo->getShardId());
- } else {
- catalogCache->onEpochChange(query.nss());
+ if (ex.code() != ErrorCodes::ShardInvalidatedForTargeting) {
+ if (auto staleInfo = ex.extraInfo<StaleConfigInfo>()) {
+ catalogCache->invalidateShardOrEntireCollectionEntryForShardedCollection(
+ opCtx,
+ query.nss(),
+ staleInfo->getVersionWanted(),
+ staleInfo->getVersionReceived(),
+ staleInfo->getShardId());
+ } else {
+ catalogCache->onEpochChange(query.nss());
+ }
}
catalogCache->setOperationShouldBlockBehindCatalogCacheRefresh(opCtx, true);
if (auto txnRouter = TransactionRouter::get(opCtx)) {
if (!txnRouter.canContinueOnStaleShardOrDbError(kFindCmdName)) {
+ if (ex.code() == ErrorCodes::ShardInvalidatedForTargeting) {
+ (void)catalogCache->getCollectionRoutingInfoWithRefresh(opCtx, query.nss());
+ }
throw;
}
diff --git a/src/mongo/s/shard_invalidated_for_targeting_exception.cpp b/src/mongo/s/shard_invalidated_for_targeting_exception.cpp
new file mode 100644
index 00000000000..52a52dfef14
--- /dev/null
+++ b/src/mongo/s/shard_invalidated_for_targeting_exception.cpp
@@ -0,0 +1,59 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/s/shard_invalidated_for_targeting_exception.h"
+
+#include "mongo/base/init.h"
+#include "mongo/util/assert_util.h"
+
+namespace mongo {
+namespace {
+
+MONGO_INIT_REGISTER_ERROR_EXTRA_INFO(ShardInvalidatedForTargetingInfo);
+
+constexpr StringData kNss = "nss"_sd;
+
+} // namespace
+
+void ShardInvalidatedForTargetingInfo::serialize(BSONObjBuilder* bob) const {
+ bob->append(kNss, _nss.ns());
+}
+
+std::shared_ptr<const ErrorExtraInfo> ShardInvalidatedForTargetingInfo::parse(const BSONObj& obj) {
+ return std::make_shared<ShardInvalidatedForTargetingInfo>(parseFromCommandError(obj));
+}
+
+ShardInvalidatedForTargetingInfo ShardInvalidatedForTargetingInfo::parseFromCommandError(
+ const BSONObj& obj) {
+ return ShardInvalidatedForTargetingInfo(NamespaceString(obj["nss"].String()));
+}
+
+} // namespace mongo
diff --git a/src/mongo/s/shard_invalidated_for_targeting_exception.h b/src/mongo/s/shard_invalidated_for_targeting_exception.h
new file mode 100644
index 00000000000..16d848cd372
--- /dev/null
+++ b/src/mongo/s/shard_invalidated_for_targeting_exception.h
@@ -0,0 +1,73 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/base/error_extra_info.h"
+#include "mongo/bson/bsonobj.h"
+#include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/db/namespace_string.h"
+
+namespace mongo {
+
+/**
+ * This error is thrown when a stale shard is found when attempting to retrieve a shard's shard
+ * version for a namespace. The router (mongos) will retry a command when encountering this error,
+ * and will block on a catalog cache refresh.
+ *
+ * TODO SERVER-39704 Allow transactions to retry upon receiving a ShardInvalidatedForTargeting
+ * error.
+ */
+class ShardInvalidatedForTargetingInfo final : public ErrorExtraInfo {
+public:
+ static constexpr auto code = ErrorCodes::ShardInvalidatedForTargeting;
+
+ explicit ShardInvalidatedForTargetingInfo(NamespaceString nss) : _nss(nss){};
+
+ const auto& getNss() const {
+ return _nss;
+ }
+
+ BSONObj toBSON() const {
+ BSONObjBuilder bob;
+ serialize(&bob);
+ return bob.obj();
+ }
+
+ void serialize(BSONObjBuilder* bob) const override;
+ static std::shared_ptr<const ErrorExtraInfo> parse(const BSONObj&);
+ static ShardInvalidatedForTargetingInfo parseFromCommandError(const BSONObj& commandError);
+
+private:
+ NamespaceString _nss;
+};
+using ShardInvalidatedForTargetingException =
+ ExceptionFor<ErrorCodes::ShardInvalidatedForTargeting>;
+
+} // namespace mongo