summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorJordi Serra Torrens <jordi.serra-torrens@mongodb.com>2021-03-23 10:32:39 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-04-06 10:36:03 +0000
commit3aa71ec3ef14d5354850e905600aa5cda2fcbba3 (patch)
treec86e16a709aafbd65d47294b024d4f2ba209b377 /src/mongo/db
parent9f32e927f98cea09100e10e7fd564df725a42deb (diff)
downloadmongo-3aa71ec3ef14d5354850e905600aa5cda2fcbba3.tar.gz
SERVER-54020: ShardInvalidatedForTargeting thrown by resharding's getDestinedRecipient() not being retried by mongos
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/error_labels.cpp1
-rw-r--r--src/mongo/db/error_labels_test.cpp6
-rw-r--r--src/mongo/db/ops/write_ops_exec.cpp4
-rw-r--r--src/mongo/db/s/resharding_destined_recipient_test.cpp11
-rw-r--r--src/mongo/db/s/resharding_util.cpp19
-rw-r--r--src/mongo/db/service_entry_point_common.cpp27
-rw-r--r--src/mongo/db/service_entry_point_common.h4
-rw-r--r--src/mongo/db/service_entry_point_mongod.cpp10
8 files changed, 67 insertions, 15 deletions
diff --git a/src/mongo/db/error_labels.cpp b/src/mongo/db/error_labels.cpp
index bdb6dc65b20..425e67220df 100644
--- a/src/mongo/db/error_labels.cpp
+++ b/src/mongo/db/error_labels.cpp
@@ -175,6 +175,7 @@ bool isTransientTransactionError(ErrorCodes::Error code,
case ErrorCodes::WriteConflict:
case ErrorCodes::LockTimeout:
case ErrorCodes::PreparedTransactionInProgress:
+ case ErrorCodes::ShardCannotRefreshDueToLocksHeld:
case ErrorCodes::ShardInvalidatedForTargeting:
case ErrorCodes::StaleDbVersion:
case ErrorCodes::TenantMigrationAborted:
diff --git a/src/mongo/db/error_labels_test.cpp b/src/mongo/db/error_labels_test.cpp
index 07b597ffaf6..aa7fa88aefe 100644
--- a/src/mongo/db/error_labels_test.cpp
+++ b/src/mongo/db/error_labels_test.cpp
@@ -69,6 +69,12 @@ TEST(IsTransientTransactionErrorTest, TenantMigrationAbortedIsTransient) {
false /* isCommitOrAbort */));
}
+TEST(IsTransientTransactionErrorTest, ShardCannotRefreshDueToLocksHeldIsTransient) {
+ ASSERT_TRUE(isTransientTransactionError(ErrorCodes::ShardCannotRefreshDueToLocksHeld,
+ false /* hasWriteConcernError */,
+ false /* isCommitOrAbort */));
+}
+
TEST(IsTransientTransactionErrorTest, ShardInvalidatedForTargetingIsTransient) {
ASSERT_TRUE(isTransientTransactionError(ErrorCodes::ShardInvalidatedForTargeting,
false /* hasWriteConcernError */,
diff --git a/src/mongo/db/ops/write_ops_exec.cpp b/src/mongo/db/ops/write_ops_exec.cpp
index d36103f03e0..9b3b0a00cde 100644
--- a/src/mongo/db/ops/write_ops_exec.cpp
+++ b/src/mongo/db/ops/write_ops_exec.cpp
@@ -302,6 +302,10 @@ bool handleError(OperationContext* opCtx,
return false;
}
+ if (ex.code() == ErrorCodes::ShardCannotRefreshDueToLocksHeld) {
+ throw;
+ }
+
out->results.emplace_back(ex.toStatus());
return !wholeOp.getOrdered();
}
diff --git a/src/mongo/db/s/resharding_destined_recipient_test.cpp b/src/mongo/db/s/resharding_destined_recipient_test.cpp
index b8a4d7ab845..26eb6ca32d0 100644
--- a/src/mongo/db/s/resharding_destined_recipient_test.cpp
+++ b/src/mongo/db/s/resharding_destined_recipient_test.cpp
@@ -50,6 +50,7 @@
#include "mongo/s/catalog/type_shard.h"
#include "mongo/s/catalog_cache_loader_mock.h"
#include "mongo/s/database_version.h"
+#include "mongo/s/shard_cannot_refresh_due_to_locks_held_exception.h"
#include "mongo/s/shard_id.h"
#include "mongo/unittest/unittest.h"
@@ -308,8 +309,14 @@ TEST_F(DestinedRecipientTest, TestGetDestinedRecipientThrowsOnBlockedRefresh) {
auto collDesc = css->getCollectionDescription(opCtx);
FailPointEnableBlock failPoint("blockCollectionCacheLookup");
- ASSERT_THROWS(getDestinedRecipient(opCtx, kNss, BSON("x" << 2 << "y" << 10), css, collDesc),
- ExceptionFor<ErrorCodes::ShardInvalidatedForTargeting>);
+ ASSERT_THROWS_WITH_CHECK(
+ getDestinedRecipient(opCtx, kNss, BSON("x" << 2 << "y" << 10), css, collDesc),
+ ShardCannotRefreshDueToLocksHeldException,
+ [&](const ShardCannotRefreshDueToLocksHeldException& ex) {
+ const auto refreshInfo = ex.extraInfo<ShardCannotRefreshDueToLocksHeldInfo>();
+ ASSERT(refreshInfo);
+ ASSERT_EQ(refreshInfo->getNss(), env.tempNss);
+ });
}
auto sw = catalogCache()->getCollectionRoutingInfoWithRefresh(opCtx, env.tempNss);
diff --git a/src/mongo/db/s/resharding_util.cpp b/src/mongo/db/s/resharding_util.cpp
index b96711fd30b..9017c5eadb2 100644
--- a/src/mongo/db/s/resharding_util.cpp
+++ b/src/mongo/db/s/resharding_util.cpp
@@ -475,22 +475,15 @@ boost::optional<ShardId> getDestinedRecipient(OperationContext* opCtx,
return boost::none;
bool allowLocks = true;
- auto tempNssRoutingInfo = Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfo(
- opCtx,
- constructTemporaryReshardingNss(sourceNss.db(), getCollectionUuid(opCtx, sourceNss)),
- allowLocks);
-
- uassert(ShardInvalidatedForTargetingInfo(sourceNss),
- "Routing information is not available for the temporary resharding collection.",
- tempNssRoutingInfo.getStatus() != ErrorCodes::StaleShardVersion);
-
- uassertStatusOK(tempNssRoutingInfo);
+ auto tempNssRoutingInfo =
+ uassertStatusOK(Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfo(
+ opCtx,
+ constructTemporaryReshardingNss(sourceNss.db(), getCollectionUuid(opCtx, sourceNss)),
+ allowLocks));
auto shardKey = reshardingKeyPattern->extractShardKeyFromDocThrows(fullDocument);
- return tempNssRoutingInfo.getValue()
- .findIntersectingChunkWithSimpleCollation(shardKey)
- .getShardId();
+ return tempNssRoutingInfo.findIntersectingChunkWithSimpleCollation(shardKey).getShardId();
}
bool isFinalOplog(const repl::OplogEntry& oplog) {
diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp
index 1e1fead879c..ba5528b2416 100644
--- a/src/mongo/db/service_entry_point_common.cpp
+++ b/src/mongo/db/service_entry_point_common.cpp
@@ -99,6 +99,7 @@
#include "mongo/rpc/metadata/tracking_metadata.h"
#include "mongo/rpc/op_msg.h"
#include "mongo/rpc/reply_builder_interface.h"
+#include "mongo/s/shard_cannot_refresh_due_to_locks_held_exception.h"
#include "mongo/transport/hello_metrics.h"
#include "mongo/transport/service_executor.h"
#include "mongo/transport/session.h"
@@ -688,6 +689,7 @@ private:
std::unique_ptr<PolymorphicScoped> _scoped;
bool _refreshedDatabase = false;
bool _refreshedCollection = false;
+ bool _refreshedCatalogCache = false;
};
class RunCommandImpl {
@@ -1663,6 +1665,31 @@ Future<void> ExecCommandDatabase::_commandExec() {
}
return s;
+ })
+ .onError<ErrorCodes::ShardCannotRefreshDueToLocksHeld>([this](Status s) -> Future<void> {
+ // This exception can never happen on the config server. Config servers can't receive
+ // SSV either, because they never have commands with shardVersion sent.
+ invariant(serverGlobalParams.clusterRole != ClusterRole::ConfigServer);
+
+ auto opCtx = _execContext->getOpCtx();
+ if (!opCtx->getClient()->isInDirectClient() && !_refreshedCatalogCache) {
+ invariant(!opCtx->lockState()->isLocked());
+
+ auto refreshInfo = s.extraInfo<ShardCannotRefreshDueToLocksHeldInfo>();
+ invariant(refreshInfo);
+
+ const auto refreshed =
+ _execContext->behaviors->refreshCatalogCache(opCtx, *refreshInfo);
+
+ if (refreshed) {
+ _refreshedCatalogCache = true;
+ if (!opCtx->inMultiDocumentTransaction()) {
+ return _commandExec();
+ }
+ }
+ }
+
+ return s;
});
}
diff --git a/src/mongo/db/service_entry_point_common.h b/src/mongo/db/service_entry_point_common.h
index d3edbacae12..9500752fd4a 100644
--- a/src/mongo/db/service_entry_point_common.h
+++ b/src/mongo/db/service_entry_point_common.h
@@ -90,6 +90,10 @@ struct ServiceEntryPointCommon {
virtual bool refreshCollection(OperationContext* opCtx, const StaleConfigInfo& se) const
noexcept = 0;
+ virtual bool refreshCatalogCache(
+ OperationContext* opCtx, const ShardCannotRefreshDueToLocksHeldInfo& refreshInfo) const
+ noexcept = 0;
+
virtual void advanceConfigOpTimeFromRequestMetadata(OperationContext* opCtx) const = 0;
MONGO_WARN_UNUSED_RESULT_FUNCTION virtual std::unique_ptr<PolymorphicScoped>
diff --git a/src/mongo/db/service_entry_point_mongod.cpp b/src/mongo/db/service_entry_point_mongod.cpp
index 6cc6546376e..436f2c0e442 100644
--- a/src/mongo/db/service_entry_point_mongod.cpp
+++ b/src/mongo/db/service_entry_point_mongod.cpp
@@ -50,6 +50,7 @@
#include "mongo/rpc/metadata/config_server_metadata.h"
#include "mongo/rpc/metadata/sharding_metadata.h"
#include "mongo/s/grid.h"
+#include "mongo/s/shard_cannot_refresh_due_to_locks_held_exception.h"
#include "mongo/s/stale_exception.h"
namespace mongo {
@@ -231,6 +232,15 @@ public:
return onShardVersionMismatchNoExcept(opCtx, se.getNss(), se.getVersionReceived()).isOK();
}
+ bool refreshCatalogCache(OperationContext* opCtx,
+ const ShardCannotRefreshDueToLocksHeldInfo& refreshInfo) const
+ noexcept override {
+ return Grid::get(opCtx)
+ ->catalogCache()
+ ->getCollectionRoutingInfo(opCtx, refreshInfo.getNss())
+ .isOK();
+ }
+
void advanceConfigOpTimeFromRequestMetadata(OperationContext* opCtx) const override {
// Handle config optime information that may have been sent along with the command.
rpc::advanceConfigOpTimeFromRequestMetadata(opCtx);