summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaloian Manassiev <kaloian.manassiev@mongodb.com>2023-04-10 14:54:22 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-04-24 09:27:35 +0000
commit47f0809077f85846db4db9e246ad3e8c77720bb6 (patch)
tree42923a3830eb0620532db941c5e0108a157c147f
parent80e26a1ea2d51b8a3c8ffbd5b38bbbc3e6bca131 (diff)
downloadmongo-47f0809077f85846db4db9e246ad3e8c77720bb6.tar.gz
SERVER-74380 Make the router loop not invariant on unexpected namespaces
-rw-r--r--jstests/sharding/mongos_validate_writes.js4
-rw-r--r--jstests/sharding/query/lookup_graph_lookup_foreign_becomes_sharded.js6
-rw-r--r--jstests/sharding/query/lookup_mongod_unaware.js63
-rw-r--r--src/mongo/db/commands.cpp2
-rw-r--r--src/mongo/db/commands/dbcommands_d.cpp2
-rw-r--r--src/mongo/db/exec/write_stage_common.h2
-rw-r--r--src/mongo/db/pipeline/dispatch_shard_pipeline_test.cpp2
-rw-r--r--src/mongo/db/pipeline/process_interface/shardsvr_process_interface.cpp15
-rw-r--r--src/mongo/db/pipeline/sharded_agg_helpers.cpp5
-rw-r--r--src/mongo/db/s/collection_sharding_state.h6
-rw-r--r--src/mongo/db/s/migration_source_manager.h4
-rw-r--r--src/mongo/db/s/move_primary_source_manager.h6
-rw-r--r--src/mongo/db/s/resharding/resharding_collection_cloner.cpp3
-rw-r--r--src/mongo/db/s/resharding/resharding_data_copy_util.h4
-rw-r--r--src/mongo/db/s/resharding/resharding_oplog_batch_applier.cpp6
-rw-r--r--src/mongo/db/s/shard_filtering_metadata_refresh.h4
-rw-r--r--src/mongo/rpc/get_status_from_command_result.cpp2
-rw-r--r--src/mongo/rpc/legacy_reply_builder.h2
-rw-r--r--src/mongo/s/SConscript4
-rw-r--r--src/mongo/s/cluster_commands_helpers.h9
-rw-r--r--src/mongo/s/commands/strategy.h4
-rw-r--r--src/mongo/s/router_role.cpp (renamed from src/mongo/s/router.cpp)90
-rw-r--r--src/mongo/s/router_role.h (renamed from src/mongo/s/router.h)0
-rw-r--r--src/mongo/s/stale_exception.cpp23
-rw-r--r--src/mongo/s/stale_exception.h23
-rw-r--r--src/mongo/s/stale_exception_test.cpp20
26 files changed, 204 insertions, 107 deletions
diff --git a/jstests/sharding/mongos_validate_writes.js b/jstests/sharding/mongos_validate_writes.js
index d52fe988aa9..8e76c4ea526 100644
--- a/jstests/sharding/mongos_validate_writes.js
+++ b/jstests/sharding/mongos_validate_writes.js
@@ -24,8 +24,8 @@ st.ensurePrimaryShard(coll.getDB().getName(), st.shard1.shardName);
coll.createIndex({a: 1});
// Shard the collection on {a: 1} and move one chunk to another shard. Updates need to be across
-// two shards to trigger an error, otherwise they are versioned and will succeed after raising
-// a StaleConfigException.
+// two shards to trigger an error, otherwise they are versioned and will succeed after raising a
+// StaleConfig error.
st.shardColl(coll, {a: 1}, {a: 0}, {a: 1}, coll.getDB(), true);
// Let the stale mongos see the collection state
diff --git a/jstests/sharding/query/lookup_graph_lookup_foreign_becomes_sharded.js b/jstests/sharding/query/lookup_graph_lookup_foreign_becomes_sharded.js
index 393ae5fa890..676510229f1 100644
--- a/jstests/sharding/query/lookup_graph_lookup_foreign_becomes_sharded.js
+++ b/jstests/sharding/query/lookup_graph_lookup_foreign_becomes_sharded.js
@@ -174,9 +174,9 @@ for (let testCase of testCases) {
const newStaleConfigErrorCount = assert.commandWorked(primaryDB.runCommand({serverStatus: 1}))
.shardingStatistics.countStaleConfigErrors;
-// ... and a single StaleConfig exception for the foreign namespace. These StalecConfig errors are
-// not always reported in the profiler, but they are reflected in the serverStatus StaleConfig error
-// count.
+// ... and a single StaleConfig error for the foreign namespace. These StaleConfig errors are not
+// always reported in the profiler, but they are reflected in the serverStatus StaleConfig error
+// counter.
assert.gt(
newStaleConfigErrorCount, prevStaleConfigErrorCount, "StaleConfig errors must have happened");
st.stop();
diff --git a/jstests/sharding/query/lookup_mongod_unaware.js b/jstests/sharding/query/lookup_mongod_unaware.js
index e88bd1c6eb0..ab7292b790f 100644
--- a/jstests/sharding/query/lookup_mongod_unaware.js
+++ b/jstests/sharding/query/lookup_mongod_unaware.js
@@ -6,8 +6,10 @@
* We restart a mongod to cause it to forget that a collection was sharded. When restarted, we
* expect it to still have all the previous data.
*
+ * // TODO (SERVER-74380): Remove requires_fcv_70 once SERVER-74380 has been backported to v6.0
* @tags: [
- * requires_persistence
+ * requires_persistence,
+ * requires_fcv_70
* ]
*
*/
@@ -18,7 +20,7 @@ load("jstests/noPassthrough/libs/server_parameter_helpers.js"); // For setParam
load("jstests/libs/discover_topology.js"); // For findDataBearingNodes.
// Restarts the primary shard and ensures that it believes both collections are unsharded.
-function restartPrimaryShard(rs, localColl, foreignColl) {
+function restartPrimaryShard(rs, ...expectedCollections) {
// Returns true if the shard is aware that the collection is sharded.
function hasRoutingInfoForNs(shardConn, coll) {
const res = shardConn.adminCommand({getShardVersion: coll, fullMetadata: true});
@@ -28,8 +30,11 @@ function restartPrimaryShard(rs, localColl, foreignColl) {
rs.restart(0);
rs.awaitSecondaryNodes();
- assert(!hasRoutingInfoForNs(rs.getPrimary(), localColl.getFullName()));
- assert(!hasRoutingInfoForNs(rs.getPrimary(), foreignColl.getFullName()));
+
+ expectedCollections.forEach(function(coll) {
+ assert(!hasRoutingInfoForNs(rs.getPrimary(), coll.getFullName()),
+ 'Shard role not cleared for ' + coll.getFullName());
+ });
}
// Disable checking for index consistency to ensure that the config server doesn't trigger a
@@ -162,5 +167,55 @@ assert.eq(mongos0LocalColl.aggregate(pipeline).toArray(), expectedResults);
restartPrimaryShard(st.rs0, mongos0LocalColl, mongos0ForeignColl);
assert.eq(mongos1LocalColl.aggregate(pipeline).toArray(), expectedResults);
+//
+// Test two-level $lookup with a stale shard handles the shard role recovery case.
+//
+jsTest.log("Running two-level $lookup with a shard that needs recovery");
+
+assert.commandWorked(st.s0.adminCommand({enableSharding: 'D'}));
+st.ensurePrimaryShard('D', st.shard0.shardName);
+
+const D = st.s0.getDB('D');
+
+assert.commandWorked(D.A.insert({Key: 1, Value: 1}));
+assert.commandWorked(D.B.insert({Key: 1, Value: 1}));
+assert.commandWorked(D.C.insert({Key: 1, Value: 1}));
+assert.commandWorked(D.D.insert({Key: 1, Value: 1}));
+
+const aggPipeline = [{
+ $lookup: {
+ from: 'B',
+ localField: 'Key',
+ foreignField: 'Value',
+ as: 'Joined',
+ pipeline: [
+ {
+ $lookup: {
+ from: 'C',
+ localField: 'Key',
+ foreignField: 'Value',
+ as: 'Joined',
+ }
+ },
+ {
+ $lookup: {
+ from: 'D',
+ localField: 'Key',
+ foreignField: 'Value',
+ as: 'Joined',
+ }
+ },
+ ],
+ }
+}];
+
+const resultBefore = D.A.aggregate(aggPipeline).toArray();
+
+// Restarting the shard primary in order for the shard role's cache to be cleared
+restartPrimaryShard(st.rs0, D.A, D.B, D.C, D.D);
+
+const resultAfter = D.A.aggregate(aggPipeline).toArray();
+assert.eq(resultBefore, resultAfter, "Before and after results do not match");
+
st.stop();
})();
diff --git a/src/mongo/db/commands.cpp b/src/mongo/db/commands.cpp
index b633d8d1e3f..edea422af8f 100644
--- a/src/mongo/db/commands.cpp
+++ b/src/mongo/db/commands.cpp
@@ -149,7 +149,7 @@ BSONObj CommandHelpers::runCommandDirectly(OperationContext* opCtx, const OpMsgR
invocation->run(opCtx, &replyBuilder);
auto body = replyBuilder.getBodyBuilder();
CommandHelpers::extractOrAppendOk(body);
- } catch (const StaleConfigException&) {
+ } catch (const ExceptionFor<ErrorCodes::StaleConfig>&) {
// These exceptions are intended to be handled at a higher level.
throw;
} catch (const DBException& ex) {
diff --git a/src/mongo/db/commands/dbcommands_d.cpp b/src/mongo/db/commands/dbcommands_d.cpp
index d847b4b9c3d..18e38c8326c 100644
--- a/src/mongo/db/commands/dbcommands_d.cpp
+++ b/src/mongo/db/commands/dbcommands_d.cpp
@@ -366,7 +366,7 @@ public:
try {
// RELOCKED
ctx.reset(new AutoGetCollectionForReadCommand(opCtx, nss));
- } catch (const StaleConfigException&) {
+ } catch (const ExceptionFor<ErrorCodes::StaleConfig>&) {
LOGV2_DEBUG(
20453,
1,
diff --git a/src/mongo/db/exec/write_stage_common.h b/src/mongo/db/exec/write_stage_common.h
index bb1a1aa92cf..67521481532 100644
--- a/src/mongo/db/exec/write_stage_common.h
+++ b/src/mongo/db/exec/write_stage_common.h
@@ -93,7 +93,7 @@ public:
}
/**
- * Checks if the 'doc' is NOT writable and additionally handles the StaleConfig exception. This
+ * Checks if the 'doc' is NOT writable and additionally handles the StaleConfig error. This
* method should be called in a context of single update / delete.
*
* Returns a pair of [optional immediate StageState return code, bool fromMigrate].
diff --git a/src/mongo/db/pipeline/dispatch_shard_pipeline_test.cpp b/src/mongo/db/pipeline/dispatch_shard_pipeline_test.cpp
index b7333014a14..35cee95de4f 100644
--- a/src/mongo/db/pipeline/dispatch_shard_pipeline_test.cpp
+++ b/src/mongo/db/pipeline/dispatch_shard_pipeline_test.cpp
@@ -30,7 +30,7 @@
#include "mongo/db/pipeline/aggregation_request_helper.h"
#include "mongo/db/pipeline/sharded_agg_helpers.h"
#include "mongo/s/query/sharded_agg_test_fixture.h"
-#include "mongo/s/router.h"
+#include "mongo/s/router_role.h"
namespace mongo {
namespace {
diff --git a/src/mongo/db/pipeline/process_interface/shardsvr_process_interface.cpp b/src/mongo/db/pipeline/process_interface/shardsvr_process_interface.cpp
index 952de4b1116..2ee63b79152 100644
--- a/src/mongo/db/pipeline/process_interface/shardsvr_process_interface.cpp
+++ b/src/mongo/db/pipeline/process_interface/shardsvr_process_interface.cpp
@@ -27,9 +27,6 @@
* it in the license file.
*/
-
-#include "mongo/platform/basic.h"
-
#include "mongo/db/pipeline/process_interface/shardsvr_process_interface.h"
#include <fmt/format.h>
@@ -50,12 +47,11 @@
#include "mongo/s/cluster_commands_helpers.h"
#include "mongo/s/cluster_write.h"
#include "mongo/s/query/document_source_merge_cursors.h"
-#include "mongo/s/router.h"
+#include "mongo/s/router_role.h"
#include "mongo/s/stale_shard_version_helpers.h"
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
-
namespace mongo {
using namespace fmt::literals;
@@ -243,8 +239,9 @@ BSONObj ShardServerProcessInterface::getCollectionOptions(OperationContext* opCt
auto optionObj = optionsElement.Obj();
// If the BSON object has field 'info' and the BSON element 'info' has field 'uuid',
- // then extract the uuid and add to the BSON object to be return. This will ensure that
- // the BSON object is complaint with the BSON object returned for non-sharded namespace.
+ // then extract the uuid and add to the BSON object to be return. This will ensure
+ // that the BSON object is complaint with the BSON object returned for non-sharded
+ // namespace.
if (auto infoElement = bsonObj["info"]; infoElement && infoElement["uuid"]) {
return optionObj.addField(infoElement["uuid"]);
}
@@ -264,8 +261,8 @@ BSONObj ShardServerProcessInterface::getCollectionOptions(OperationContext* opCt
std::list<BSONObj> ShardServerProcessInterface::getIndexSpecs(OperationContext* opCtx,
const NamespaceString& ns,
bool includeBuildUUIDs) {
- // Note that 'ns' must be an unsharded collection. The indexes for a sharded collection must be
- // read from a shard with a chunk instead of the primary shard.
+ // Note that 'ns' must be an unsharded collection. The indexes for a sharded collection must
+ // be read from a shard with a chunk instead of the primary shard.
auto cachedDbInfo =
uassertStatusOK(Grid::get(opCtx)->catalogCache()->getDatabase(opCtx, ns.db()));
auto shard = uassertStatusOK(
diff --git a/src/mongo/db/pipeline/sharded_agg_helpers.cpp b/src/mongo/db/pipeline/sharded_agg_helpers.cpp
index aca3354a9b4..bf29d0c5e18 100644
--- a/src/mongo/db/pipeline/sharded_agg_helpers.cpp
+++ b/src/mongo/db/pipeline/sharded_agg_helpers.cpp
@@ -61,8 +61,7 @@
#include "mongo/s/query/document_source_merge_cursors.h"
#include "mongo/s/query/establish_cursors.h"
#include "mongo/s/query_analysis_sampler_util.h"
-#include "mongo/s/router.h"
-#include "mongo/s/stale_exception.h"
+#include "mongo/s/router_role.h"
#include "mongo/s/transaction_router.h"
#include "mongo/util/fail_point.h"
#include "mongo/util/overloaded_visitor.h"
@@ -1183,7 +1182,7 @@ DispatchShardPipelineResults dispatchShardPipeline(
targetedSampleId,
ReadPreferenceSetting::get(opCtx));
- } catch (const StaleConfigException& e) {
+ } catch (const ExceptionFor<ErrorCodes::StaleConfig>& e) {
// Check to see if the command failed because of a stale shard version or something
// else.
auto staleInfo = e.extraInfo<StaleConfigInfo>();
diff --git a/src/mongo/db/s/collection_sharding_state.h b/src/mongo/db/s/collection_sharding_state.h
index d01c3802e90..3b9ef19b7cf 100644
--- a/src/mongo/db/s/collection_sharding_state.h
+++ b/src/mongo/db/s/collection_sharding_state.h
@@ -120,7 +120,7 @@ public:
/**
* If the shard currently doesn't know whether the collection is sharded or not, it will throw a
- * StaleConfig exception.
+ * StaleConfig error.
*
* If the request doesn't have a shard version all collections will be treated as UNSHARDED.
*
@@ -137,7 +137,7 @@ public:
*
* If the shard currently doesn't know whether the collection is sharded or not, or if the
* expected shard version doesn't match with the one in the OperationShardingState, it will
- * throw a StaleConfig exception.
+ * throw a StaleConfig error.
*
* If the operation context contains an 'atClusterTime', the returned filtering object will be
* tied to a specific point in time. Otherwise, it will reference the latest cluster time
@@ -168,7 +168,7 @@ public:
/**
* Checks whether the shard version in the operation context is compatible with the shard
- * version of the collection and if not, throws StaleConfigException populated with the received
+ * version of the collection and if not, throws StaleConfig error populated with the received
* and wanted versions.
*
* If the request is not versioned all collections will be treated as UNSHARDED.
diff --git a/src/mongo/db/s/migration_source_manager.h b/src/mongo/db/s/migration_source_manager.h
index 35a1c407e6b..3baf05e40c6 100644
--- a/src/mongo/db/s/migration_source_manager.h
+++ b/src/mongo/db/s/migration_source_manager.h
@@ -95,8 +95,8 @@ public:
*
* May throw any exception. Known exceptions are:
* - InvalidOptions if the operation context is missing shard version
- * - StaleConfigException if the expected collection version does not match what we find it
- * to be after acquiring the distributed lock.
+ * - StaleConfig if the expected placement version does not match what we find it to be after
+ * acquiring the distributed lock
*/
MigrationSourceManager(OperationContext* opCtx,
ShardsvrMoveRange&& request,
diff --git a/src/mongo/db/s/move_primary_source_manager.h b/src/mongo/db/s/move_primary_source_manager.h
index c4de0f2254f..2b87aa02c8c 100644
--- a/src/mongo/db/s/move_primary_source_manager.h
+++ b/src/mongo/db/s/move_primary_source_manager.h
@@ -72,10 +72,10 @@ public:
* Instantiates a new movePrimary source manager. Must be called with the distributed lock
* acquired in advance (not asserted).
*
- * May throw any exception. Known exceptions (TODO) are:
+ * May throw any exception. Known exceptions are:
* - InvalidOptions if the operation context is missing database version
- * - StaleConfigException if the expected database version does not match what we find it
- * to be after acquiring the distributed lock.
+ * - StaleConfig if the expected database version does not match what we find it to be after
+ * acquiring the distributed lock
*/
MovePrimarySourceManager(OperationContext* opCtx,
ShardMovePrimary requestArgs,
diff --git a/src/mongo/db/s/resharding/resharding_collection_cloner.cpp b/src/mongo/db/s/resharding/resharding_collection_cloner.cpp
index ff5f03d3a35..f4749deff37 100644
--- a/src/mongo/db/s/resharding/resharding_collection_cloner.cpp
+++ b/src/mongo/db/s/resharding/resharding_collection_cloner.cpp
@@ -27,9 +27,6 @@
* it in the license file.
*/
-
-#include "mongo/platform/basic.h"
-
#include "mongo/db/s/resharding/resharding_collection_cloner.h"
#include <utility>
diff --git a/src/mongo/db/s/resharding/resharding_data_copy_util.h b/src/mongo/db/s/resharding/resharding_data_copy_util.h
index bf3f21a7118..7ed2b0b2aae 100644
--- a/src/mongo/db/s/resharding/resharding_data_copy_util.h
+++ b/src/mongo/db/s/resharding/resharding_data_copy_util.h
@@ -149,8 +149,8 @@ void updateSessionRecord(OperationContext* opCtx,
/**
* Calls and returns the value from the supplied lambda function.
*
- * If a StaleConfig exception is thrown during its execution, then this function will attempt to
- * refresh the collection and invoke the supplied lambda function a second time.
+ * If a StaleConfig error is thrown during its execution, then this function will attempt to refresh
+ * the collection and invoke the supplied lambda function a second time.
*/
template <typename Callable>
auto withOneStaleConfigRetry(OperationContext* opCtx, Callable&& callable) {
diff --git a/src/mongo/db/s/resharding/resharding_oplog_batch_applier.cpp b/src/mongo/db/s/resharding/resharding_oplog_batch_applier.cpp
index fb5ddd671f6..5bb0690e909 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_batch_applier.cpp
+++ b/src/mongo/db/s/resharding/resharding_oplog_batch_applier.cpp
@@ -27,13 +27,8 @@
* it in the license file.
*/
-
-#include "mongo/platform/basic.h"
-
#include "mongo/db/s/resharding/resharding_oplog_batch_applier.h"
-#include <memory>
-
#include "mongo/db/s/operation_sharding_state.h"
#include "mongo/db/s/resharding/resharding_data_copy_util.h"
#include "mongo/db/s/resharding/resharding_future_util.h"
@@ -43,7 +38,6 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kResharding
-
namespace mongo {
ReshardingOplogBatchApplier::ReshardingOplogBatchApplier(
diff --git a/src/mongo/db/s/shard_filtering_metadata_refresh.h b/src/mongo/db/s/shard_filtering_metadata_refresh.h
index 6da07eb6d8b..7e01f6c684b 100644
--- a/src/mongo/db/s/shard_filtering_metadata_refresh.h
+++ b/src/mongo/db/s/shard_filtering_metadata_refresh.h
@@ -39,8 +39,8 @@ namespace mongo {
class OperationContext;
/**
- * Must be invoked whenever code, which is executing on a shard encounters a StaleConfig exception
- * and should be passed the placement version from the 'version received' in the exception. If the
+ * Must be invoked whenever code, which is executing on a shard encounters a StaleConfig error and
+ * should be passed the placement version from the 'version received' in the exception. If the
* shard's current placement version is behind 'chunkVersionReceived', causes the shard's filtering
* metadata to be refreshed from the config server, otherwise does nothing and immediately returns.
* If there are other threads currently performing refresh, blocks so that only one of them hits the
diff --git a/src/mongo/rpc/get_status_from_command_result.cpp b/src/mongo/rpc/get_status_from_command_result.cpp
index 2607ff15e3c..315d4ef3135 100644
--- a/src/mongo/rpc/get_status_from_command_result.cpp
+++ b/src/mongo/rpc/get_status_from_command_result.cpp
@@ -50,7 +50,7 @@ Status getStatusFromCommandResult(const BSONObj& result) {
BSONElement codeElement = result["code"];
BSONElement errmsgElement = result["errmsg"];
- // StaleConfigException doesn't pass "ok" in legacy servers
+ // StaleConfig doesn't pass "ok" in legacy servers
BSONElement dollarErrElement = result["$err"];
if (okElement.eoo() && dollarErrElement.eoo()) {
diff --git a/src/mongo/rpc/legacy_reply_builder.h b/src/mongo/rpc/legacy_reply_builder.h
index 45e986ab9ed..22a9c4566f6 100644
--- a/src/mongo/rpc/legacy_reply_builder.h
+++ b/src/mongo/rpc/legacy_reply_builder.h
@@ -49,7 +49,7 @@ public:
LegacyReplyBuilder(Message&&);
~LegacyReplyBuilder() final;
- // Override of setCommandReply specifically used to handle StaleConfigException.
+ // Override of setCommandReply specifically used to handle StaleConfig errors
LegacyReplyBuilder& setCommandReply(Status nonOKStatus, BSONObj extraErrorInfo) final;
LegacyReplyBuilder& setRawCommandReply(const BSONObj& commandReply) final;
diff --git a/src/mongo/s/SConscript b/src/mongo/s/SConscript
index 97744139c45..de925d3f271 100644
--- a/src/mongo/s/SConscript
+++ b/src/mongo/s/SConscript
@@ -77,7 +77,7 @@ env.Library(
'multi_statement_transaction_requests_sender.cpp',
'router_transactions_metrics.cpp',
'router_transactions_stats.idl',
- 'router.cpp',
+ 'router_role.cpp',
'session_catalog_router.cpp',
'shard_key_pattern_query_util.cpp',
'stale_shard_version_helpers.cpp',
@@ -87,7 +87,6 @@ env.Library(
LIBDEPS=[
'$BUILD_DIR/mongo/db/commands/txn_cmd_request',
'$BUILD_DIR/mongo/db/pipeline/process_interface/mongo_process_interface',
- '$BUILD_DIR/mongo/db/query/op_metrics',
'$BUILD_DIR/mongo/db/repl/read_concern_args',
'$BUILD_DIR/mongo/db/session/logical_session_id_helpers',
'$BUILD_DIR/mongo/db/session/session_catalog',
@@ -100,6 +99,7 @@ env.Library(
'$BUILD_DIR/mongo/db/catalog/collection_uuid_mismatch_info',
'$BUILD_DIR/mongo/db/internal_transactions_feature_flag',
'$BUILD_DIR/mongo/db/mongohasher',
+ '$BUILD_DIR/mongo/db/query/op_metrics',
'$BUILD_DIR/mongo/db/query/query_planner',
'$BUILD_DIR/mongo/db/session/sessions_collection',
],
diff --git a/src/mongo/s/cluster_commands_helpers.h b/src/mongo/s/cluster_commands_helpers.h
index c6a860a06fe..549e91b529c 100644
--- a/src/mongo/s/cluster_commands_helpers.h
+++ b/src/mongo/s/cluster_commands_helpers.h
@@ -86,7 +86,8 @@ boost::intrusive_ptr<ExpressionContext> makeExpressionContextWithDefaultsForTarg
* Dispatches all the specified requests in parallel and waits until all complete, returning a
* vector of the same size and positions as that of 'requests'.
*
- * Throws StaleConfigException if any remote returns a stale shardVersion error.
+ * Throws StaleConfig if any of the remotes returns that error, regardless of what the other errors
+ * are.
*/
std::vector<AsyncRequestsSender::Response> gatherResponses(
OperationContext* opCtx,
@@ -170,7 +171,7 @@ std::vector<AsyncRequestsSender::Response> scatterGatherUnversionedTargetAllShar
* the collection has query sampling enabled and the rate-limited sampler successfully generates a
* sample id for it.
*
- * Does not retry on StaleConfigException.
+ * Does not retry on StaleConfig errors.
*/
[[nodiscard]] std::vector<AsyncRequestsSender::Response> scatterGatherVersionedTargetByRoutingTable(
OperationContext* opCtx,
@@ -191,7 +192,7 @@ std::vector<AsyncRequestsSender::Response> scatterGatherUnversionedTargetAllShar
*
* Callers can specify shards to skip, even if these shards would be otherwise targeted.
*
- * Allows StaleConfigException errors to append to the response list.
+ * Allows StaleConfig errors to append to the response list.
*/
std::vector<AsyncRequestsSender::Response>
scatterGatherVersionedTargetByRoutingTableNoThrowOnStaleShardVersionErrors(
@@ -222,7 +223,7 @@ AsyncRequestsSender::Response executeCommandAgainstDatabasePrimary(
* Utility for dispatching commands against the shard with the MinKey chunk for the namespace and
* attaching the appropriate shard version.
*
- * Does not retry on StaleConfigException.
+ * Does not retry on StaleConfig errors.
*/
AsyncRequestsSender::Response executeCommandAgainstShardWithMinKeyChunk(
OperationContext* opCtx,
diff --git a/src/mongo/s/commands/strategy.h b/src/mongo/s/commands/strategy.h
index 1e04130f657..73916229f7f 100644
--- a/src/mongo/s/commands/strategy.h
+++ b/src/mongo/s/commands/strategy.h
@@ -41,8 +41,8 @@ public:
/**
* Executes a command from either OP_QUERY or OP_MSG wire protocols.
*
- * Catches StaleConfigException errors and retries the command automatically after refreshing
- * the metadata for the failing namespace.
+ * Catches StaleConfig errors and retries the command automatically after refreshing the
+ * metadata for the failing namespace.
*/
static Future<DbResponse> clientCommand(std::shared_ptr<RequestExecutionContext> rec);
};
diff --git a/src/mongo/s/router.cpp b/src/mongo/s/router_role.cpp
index 180c9766bee..ad2e47f2b64 100644
--- a/src/mongo/s/router.cpp
+++ b/src/mongo/s/router_role.cpp
@@ -28,7 +28,7 @@
*/
-#include "mongo/s/router.h"
+#include "mongo/s/router_role.h"
#include "mongo/logv2/log.h"
#include "mongo/s/grid.h"
@@ -36,7 +36,6 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
-
namespace mongo {
namespace sharding {
namespace router {
@@ -71,32 +70,34 @@ CachedDatabaseInfo DBPrimaryRouter::_getRoutingInfo(OperationContext* opCtx) con
}
void DBPrimaryRouter::_onException(RouteContext* context, Status s) {
- if (++context->numAttempts > kMaxNumStaleVersionRetries) {
- uassertStatusOKWithContext(
- s,
- str::stream() << "Exceeded maximum number of " << kMaxNumStaleVersionRetries
- << " retries attempting \'" << context->comment << "\'");
- } else {
- LOGV2_DEBUG(637590,
- 3,
- "Retrying {description}. Got error: {status}",
- "description"_attr = context->comment,
- "status"_attr = s);
- }
-
auto catalogCache = Grid::get(_service)->catalogCache();
if (s == ErrorCodes::StaleDbVersion) {
auto si = s.extraInfo<StaleDbRoutingVersion>();
- invariant(si);
- invariant(si->getDb() == _db,
- str::stream() << "StaleDbVersion on unexpected database. Expected " << _db
- << ", received " << si->getDb());
+ tassert(6375900, "StaleDbVersion must have extraInfo", si);
+ tassert(6375901,
+ str::stream() << "StaleDbVersion on unexpected database. Expected " << _db
+ << ", received " << si->getDb(),
+ si->getDb() == _db);
catalogCache->onStaleDatabaseVersion(si->getDb(), si->getVersionWanted());
} else {
uassertStatusOK(s);
}
+
+ if (++context->numAttempts > kMaxNumStaleVersionRetries) {
+ uassertStatusOKWithContext(
+ s,
+ str::stream() << "Exceeded maximum number of " << kMaxNumStaleVersionRetries
+ << " retries attempting \'" << context->comment << "\'");
+ } else {
+ LOGV2_DEBUG(6375902,
+ 3,
+ "Retrying database primary routing operation",
+ "attempt"_attr = context->numAttempts,
+ "comment"_attr = context->comment,
+ "status"_attr = s);
+ }
}
CollectionRouter::CollectionRouter(ServiceContext* service, NamespaceString nss)
@@ -127,42 +128,39 @@ CollectionRoutingInfo CollectionRouter::_getRoutingInfo(OperationContext* opCtx)
}
void CollectionRouter::_onException(RouteContext* context, Status s) {
+ auto catalogCache = Grid::get(_service)->catalogCache();
+
+ if (s == ErrorCodes::StaleDbVersion) {
+ auto si = s.extraInfo<StaleDbRoutingVersion>();
+ tassert(6375903, "StaleDbVersion must have extraInfo", si);
+ catalogCache->onStaleDatabaseVersion(si->getDb(), si->getVersionWanted());
+ } else if (s == ErrorCodes::StaleConfig) {
+ auto si = s.extraInfo<StaleConfigInfo>();
+ tassert(6375904, "StaleConfig must have extraInfo", si);
+ catalogCache->invalidateShardOrEntireCollectionEntryForShardedCollection(
+ si->getNss(), si->getVersionWanted(), si->getShardId());
+ } else if (s == ErrorCodes::StaleEpoch) {
+ auto si = s.extraInfo<StaleEpochInfo>();
+ tassert(6375905, "StaleEpoch must have extra info", si);
+ catalogCache->invalidateShardOrEntireCollectionEntryForShardedCollection(
+ si->getNss(), si->getVersionWanted(), ShardId());
+ } else {
+ uassertStatusOK(s);
+ }
+
if (++context->numAttempts > kMaxNumStaleVersionRetries) {
uassertStatusOKWithContext(
s,
str::stream() << "Exceeded maximum number of " << kMaxNumStaleVersionRetries
<< " retries attempting \'" << context->comment << "\'");
} else {
- LOGV2_DEBUG(637591,
+ LOGV2_DEBUG(6375906,
3,
- "Retrying {description}. Got error: {status}",
- "description"_attr = context->comment,
+ "Retrying collection routing operation",
+ "attempt"_attr = context->numAttempts,
+ "comment"_attr = context->comment,
"status"_attr = s);
}
-
- auto catalogCache = Grid::get(_service)->catalogCache();
-
- if (s.isA<ErrorCategory::StaleShardVersionError>()) {
- if (auto si = s.extraInfo<StaleConfigInfo>()) {
- invariant(si->getNss() == _nss,
- str::stream() << "StaleConfig on unexpected namespace. Expected " << _nss
- << ", received " << si->getNss());
- catalogCache->invalidateShardOrEntireCollectionEntryForShardedCollection(
- _nss, si->getVersionWanted(), si->getShardId());
- } else {
- catalogCache->invalidateCollectionEntry_LINEARIZABLE(_nss);
- }
- } else if (s == ErrorCodes::StaleDbVersion) {
- auto si = s.extraInfo<StaleDbRoutingVersion>();
- invariant(si);
- invariant(si->getDb() == _nss.db(),
- str::stream() << "StaleDbVersion on unexpected database. Expected " << _nss.db()
- << ", received " << si->getDb());
-
- catalogCache->onStaleDatabaseVersion(si->getDb(), si->getVersionWanted());
- } else {
- uassertStatusOK(s);
- }
}
} // namespace router
diff --git a/src/mongo/s/router.h b/src/mongo/s/router_role.h
index 2004f377aa8..2004f377aa8 100644
--- a/src/mongo/s/router.h
+++ b/src/mongo/s/router_role.h
diff --git a/src/mongo/s/stale_exception.cpp b/src/mongo/s/stale_exception.cpp
index c9e66b5ee14..7c2e0531054 100644
--- a/src/mongo/s/stale_exception.cpp
+++ b/src/mongo/s/stale_exception.cpp
@@ -69,10 +69,31 @@ std::shared_ptr<const ErrorExtraInfo> StaleConfigInfo::parse(const BSONObj& obj)
void StaleEpochInfo::serialize(BSONObjBuilder* bob) const {
bob->append("ns", _nss.ns());
+ if (_received)
+ _received->serialize("vReceived", bob);
+ if (_wanted)
+ _wanted->serialize("vWanted", bob);
}
std::shared_ptr<const ErrorExtraInfo> StaleEpochInfo::parse(const BSONObj& obj) {
- return std::make_shared<StaleEpochInfo>(NamespaceString(obj["ns"].String()));
+ boost::optional<ShardVersion> received;
+ if (auto vReceivedElem = obj["vReceived"])
+ received = ShardVersion::parse(vReceivedElem);
+
+ boost::optional<ShardVersion> wanted;
+ if (auto vWantedElem = obj["vWanted"])
+ wanted = ShardVersion::parse(vWantedElem);
+
+ uassert(6375907,
+ str::stream() << "Either both vReceived (" << received << ")"
+ << " and vWanted (" << wanted << ") must be present or none",
+ received.is_initialized() == wanted.is_initialized());
+
+ if (received)
+ return std::make_shared<StaleEpochInfo>(
+ NamespaceString(obj["ns"].String()), *received, *wanted);
+ else
+ return std::make_shared<StaleEpochInfo>(NamespaceString(obj["ns"].String()));
}
void StaleDbRoutingVersion::serialize(BSONObjBuilder* bob) const {
diff --git a/src/mongo/s/stale_exception.h b/src/mongo/s/stale_exception.h
index da34725af68..3da30934803 100644
--- a/src/mongo/s/stale_exception.h
+++ b/src/mongo/s/stale_exception.h
@@ -82,7 +82,7 @@ public:
void serialize(BSONObjBuilder* bob) const;
static std::shared_ptr<const ErrorExtraInfo> parse(const BSONObj& obj);
-protected:
+private:
NamespaceString _nss;
ShardVersion _received;
boost::optional<ShardVersion> _wanted;
@@ -93,24 +93,41 @@ protected:
boost::optional<OperationType> _duringOperationType;
};
+// TODO (SERVER-74380): Rename the StaleEpoch code to StaleDownstreamRouter and the info to
+// StaleDownstreamRouterInfo
class StaleEpochInfo final : public ErrorExtraInfo {
public:
static constexpr auto code = ErrorCodes::StaleEpoch;
+ StaleEpochInfo(NamespaceString nss, ShardVersion received, ShardVersion wanted)
+ : _nss(std::move(nss)), _received(received), _wanted(wanted) {}
+
+ // TODO (SERVER-74380): Remove this constructor
StaleEpochInfo(NamespaceString nss) : _nss(std::move(nss)) {}
const auto& getNss() const {
return _nss;
}
+ const auto& getVersionReceived() const {
+ return _received;
+ }
+
+ const auto& getVersionWanted() const {
+ return _wanted;
+ }
+
void serialize(BSONObjBuilder* bob) const;
static std::shared_ptr<const ErrorExtraInfo> parse(const BSONObj& obj);
private:
NamespaceString _nss;
-};
-using StaleConfigException = ExceptionFor<ErrorCodes::StaleConfig>;
+ // TODO (SERVER-74380): These two fields are boost::optional for backwards compatibility. Either
+ // both of them are boost::none or both are set.
+ boost::optional<ShardVersion> _received;
+ boost::optional<ShardVersion> _wanted;
+};
class StaleDbRoutingVersion final : public ErrorExtraInfo {
public:
diff --git a/src/mongo/s/stale_exception_test.cpp b/src/mongo/s/stale_exception_test.cpp
index 1e92988516d..a91f52edd8f 100644
--- a/src/mongo/s/stale_exception_test.cpp
+++ b/src/mongo/s/stale_exception_test.cpp
@@ -54,7 +54,7 @@ TEST(StaleExceptionTest, StaleConfigInfoSerializationTest) {
ASSERT_EQUALS(deserializedInfo->getShardId(), kShardId);
}
-TEST(StaleExceptionTest, StaleEpochInfoSerializationTest) {
+TEST(StaleExceptionTest, StaleEpochInfoLegacySerializationTest) {
StaleEpochInfo info(kNss);
// Serialize
@@ -66,6 +66,24 @@ TEST(StaleExceptionTest, StaleEpochInfoSerializationTest) {
std::static_pointer_cast<const StaleEpochInfo>(StaleEpochInfo::parse(bob.obj()));
ASSERT_EQUALS(deserializedInfo->getNss(), kNss);
+ ASSERT(!deserializedInfo->getVersionReceived());
+ ASSERT(!deserializedInfo->getVersionWanted());
+}
+
+TEST(StaleExceptionTest, StaleEpochInfoSerializationTest) {
+ StaleEpochInfo info(kNss, ShardVersion::UNSHARDED(), ShardVersion::UNSHARDED());
+
+ // Serialize
+ BSONObjBuilder bob;
+ info.serialize(&bob);
+
+ // Deserialize
+ auto deserializedInfo =
+ std::static_pointer_cast<const StaleEpochInfo>(StaleEpochInfo::parse(bob.obj()));
+
+ ASSERT_EQ(deserializedInfo->getNss(), kNss);
+ ASSERT_EQ(deserializedInfo->getVersionReceived(), ShardVersion::UNSHARDED());
+ ASSERT_EQ(deserializedInfo->getVersionWanted(), ShardVersion::UNSHARDED());
}
} // namespace