diff options
author | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2023-04-10 14:54:22 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-04-24 09:27:35 +0000 |
commit | 47f0809077f85846db4db9e246ad3e8c77720bb6 (patch) | |
tree | 42923a3830eb0620532db941c5e0108a157c147f | |
parent | 80e26a1ea2d51b8a3c8ffbd5b38bbbc3e6bca131 (diff) | |
download | mongo-47f0809077f85846db4db9e246ad3e8c77720bb6.tar.gz |
SERVER-74380 Make the router loop not invariant on unexpected namespaces
26 files changed, 204 insertions, 107 deletions
diff --git a/jstests/sharding/mongos_validate_writes.js b/jstests/sharding/mongos_validate_writes.js index d52fe988aa9..8e76c4ea526 100644 --- a/jstests/sharding/mongos_validate_writes.js +++ b/jstests/sharding/mongos_validate_writes.js @@ -24,8 +24,8 @@ st.ensurePrimaryShard(coll.getDB().getName(), st.shard1.shardName); coll.createIndex({a: 1}); // Shard the collection on {a: 1} and move one chunk to another shard. Updates need to be across -// two shards to trigger an error, otherwise they are versioned and will succeed after raising -// a StaleConfigException. +// two shards to trigger an error, otherwise they are versioned and will succeed after raising a +// StaleConfig error. st.shardColl(coll, {a: 1}, {a: 0}, {a: 1}, coll.getDB(), true); // Let the stale mongos see the collection state diff --git a/jstests/sharding/query/lookup_graph_lookup_foreign_becomes_sharded.js b/jstests/sharding/query/lookup_graph_lookup_foreign_becomes_sharded.js index 393ae5fa890..676510229f1 100644 --- a/jstests/sharding/query/lookup_graph_lookup_foreign_becomes_sharded.js +++ b/jstests/sharding/query/lookup_graph_lookup_foreign_becomes_sharded.js @@ -174,9 +174,9 @@ for (let testCase of testCases) { const newStaleConfigErrorCount = assert.commandWorked(primaryDB.runCommand({serverStatus: 1})) .shardingStatistics.countStaleConfigErrors; -// ... and a single StaleConfig exception for the foreign namespace. These StalecConfig errors are -// not always reported in the profiler, but they are reflected in the serverStatus StaleConfig error -// count. +// ... and a single StaleConfig error for the foreign namespace. These StaleConfig errors are not +// always reported in the profiler, but they are reflected in the serverStatus StaleConfig error +// counter. assert.gt( newStaleConfigErrorCount, prevStaleConfigErrorCount, "StaleConfig errors must have happened"); st.stop(); diff --git a/jstests/sharding/query/lookup_mongod_unaware.js b/jstests/sharding/query/lookup_mongod_unaware.js index e88bd1c6eb0..ab7292b790f 100644 --- a/jstests/sharding/query/lookup_mongod_unaware.js +++ b/jstests/sharding/query/lookup_mongod_unaware.js @@ -6,8 +6,10 @@ * We restart a mongod to cause it to forget that a collection was sharded. When restarted, we * expect it to still have all the previous data. * + * // TODO (SERVER-74380): Remove requires_fcv_70 once SERVER-74380 has been backported to v6.0 * @tags: [ - * requires_persistence + * requires_persistence, + * requires_fcv_70 * ] * */ @@ -18,7 +20,7 @@ load("jstests/noPassthrough/libs/server_parameter_helpers.js"); // For setParam load("jstests/libs/discover_topology.js"); // For findDataBearingNodes. // Restarts the primary shard and ensures that it believes both collections are unsharded. -function restartPrimaryShard(rs, localColl, foreignColl) { +function restartPrimaryShard(rs, ...expectedCollections) { // Returns true if the shard is aware that the collection is sharded. function hasRoutingInfoForNs(shardConn, coll) { const res = shardConn.adminCommand({getShardVersion: coll, fullMetadata: true}); @@ -28,8 +30,11 @@ function restartPrimaryShard(rs, localColl, foreignColl) { rs.restart(0); rs.awaitSecondaryNodes(); - assert(!hasRoutingInfoForNs(rs.getPrimary(), localColl.getFullName())); - assert(!hasRoutingInfoForNs(rs.getPrimary(), foreignColl.getFullName())); + + expectedCollections.forEach(function(coll) { + assert(!hasRoutingInfoForNs(rs.getPrimary(), coll.getFullName()), + 'Shard role not cleared for ' + coll.getFullName()); + }); } // Disable checking for index consistency to ensure that the config server doesn't trigger a @@ -162,5 +167,55 @@ assert.eq(mongos0LocalColl.aggregate(pipeline).toArray(), expectedResults); restartPrimaryShard(st.rs0, mongos0LocalColl, mongos0ForeignColl); assert.eq(mongos1LocalColl.aggregate(pipeline).toArray(), expectedResults); +// +// Test two-level $lookup with a stale shard handles the shard role recovery case. +// +jsTest.log("Running two-level $lookup with a shard that needs recovery"); + +assert.commandWorked(st.s0.adminCommand({enableSharding: 'D'})); +st.ensurePrimaryShard('D', st.shard0.shardName); + +const D = st.s0.getDB('D'); + +assert.commandWorked(D.A.insert({Key: 1, Value: 1})); +assert.commandWorked(D.B.insert({Key: 1, Value: 1})); +assert.commandWorked(D.C.insert({Key: 1, Value: 1})); +assert.commandWorked(D.D.insert({Key: 1, Value: 1})); + +const aggPipeline = [{ + $lookup: { + from: 'B', + localField: 'Key', + foreignField: 'Value', + as: 'Joined', + pipeline: [ + { + $lookup: { + from: 'C', + localField: 'Key', + foreignField: 'Value', + as: 'Joined', + } + }, + { + $lookup: { + from: 'D', + localField: 'Key', + foreignField: 'Value', + as: 'Joined', + } + }, + ], + } +}]; + +const resultBefore = D.A.aggregate(aggPipeline).toArray(); + +// Restarting the shard primary in order for the shard role's cache to be cleared +restartPrimaryShard(st.rs0, D.A, D.B, D.C, D.D); + +const resultAfter = D.A.aggregate(aggPipeline).toArray(); +assert.eq(resultBefore, resultAfter, "Before and after results do not match"); + st.stop(); })(); diff --git a/src/mongo/db/commands.cpp b/src/mongo/db/commands.cpp index b633d8d1e3f..edea422af8f 100644 --- a/src/mongo/db/commands.cpp +++ b/src/mongo/db/commands.cpp @@ -149,7 +149,7 @@ BSONObj CommandHelpers::runCommandDirectly(OperationContext* opCtx, const OpMsgR invocation->run(opCtx, &replyBuilder); auto body = replyBuilder.getBodyBuilder(); CommandHelpers::extractOrAppendOk(body); - } catch (const StaleConfigException&) { + } catch (const ExceptionFor<ErrorCodes::StaleConfig>&) { // These exceptions are intended to be handled at a higher level. throw; } catch (const DBException& ex) { diff --git a/src/mongo/db/commands/dbcommands_d.cpp b/src/mongo/db/commands/dbcommands_d.cpp index d847b4b9c3d..18e38c8326c 100644 --- a/src/mongo/db/commands/dbcommands_d.cpp +++ b/src/mongo/db/commands/dbcommands_d.cpp @@ -366,7 +366,7 @@ public: try { // RELOCKED ctx.reset(new AutoGetCollectionForReadCommand(opCtx, nss)); - } catch (const StaleConfigException&) { + } catch (const ExceptionFor<ErrorCodes::StaleConfig>&) { LOGV2_DEBUG( 20453, 1, diff --git a/src/mongo/db/exec/write_stage_common.h b/src/mongo/db/exec/write_stage_common.h index bb1a1aa92cf..67521481532 100644 --- a/src/mongo/db/exec/write_stage_common.h +++ b/src/mongo/db/exec/write_stage_common.h @@ -93,7 +93,7 @@ public: } /** - * Checks if the 'doc' is NOT writable and additionally handles the StaleConfig exception. This + * Checks if the 'doc' is NOT writable and additionally handles the StaleConfig error. This * method should be called in a context of single update / delete. * * Returns a pair of [optional immediate StageState return code, bool fromMigrate]. diff --git a/src/mongo/db/pipeline/dispatch_shard_pipeline_test.cpp b/src/mongo/db/pipeline/dispatch_shard_pipeline_test.cpp index b7333014a14..35cee95de4f 100644 --- a/src/mongo/db/pipeline/dispatch_shard_pipeline_test.cpp +++ b/src/mongo/db/pipeline/dispatch_shard_pipeline_test.cpp @@ -30,7 +30,7 @@ #include "mongo/db/pipeline/aggregation_request_helper.h" #include "mongo/db/pipeline/sharded_agg_helpers.h" #include "mongo/s/query/sharded_agg_test_fixture.h" -#include "mongo/s/router.h" +#include "mongo/s/router_role.h" namespace mongo { namespace { diff --git a/src/mongo/db/pipeline/process_interface/shardsvr_process_interface.cpp b/src/mongo/db/pipeline/process_interface/shardsvr_process_interface.cpp index 952de4b1116..2ee63b79152 100644 --- a/src/mongo/db/pipeline/process_interface/shardsvr_process_interface.cpp +++ b/src/mongo/db/pipeline/process_interface/shardsvr_process_interface.cpp @@ -27,9 +27,6 @@ * it in the license file. */ - -#include "mongo/platform/basic.h" - #include "mongo/db/pipeline/process_interface/shardsvr_process_interface.h" #include <fmt/format.h> @@ -50,12 +47,11 @@ #include "mongo/s/cluster_commands_helpers.h" #include "mongo/s/cluster_write.h" #include "mongo/s/query/document_source_merge_cursors.h" -#include "mongo/s/router.h" +#include "mongo/s/router_role.h" #include "mongo/s/stale_shard_version_helpers.h" #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery - namespace mongo { using namespace fmt::literals; @@ -243,8 +239,9 @@ BSONObj ShardServerProcessInterface::getCollectionOptions(OperationContext* opCt auto optionObj = optionsElement.Obj(); // If the BSON object has field 'info' and the BSON element 'info' has field 'uuid', - // then extract the uuid and add to the BSON object to be return. This will ensure that - // the BSON object is complaint with the BSON object returned for non-sharded namespace. + // then extract the uuid and add to the BSON object to be return. This will ensure + // that the BSON object is complaint with the BSON object returned for non-sharded + // namespace. if (auto infoElement = bsonObj["info"]; infoElement && infoElement["uuid"]) { return optionObj.addField(infoElement["uuid"]); } @@ -264,8 +261,8 @@ BSONObj ShardServerProcessInterface::getCollectionOptions(OperationContext* opCt std::list<BSONObj> ShardServerProcessInterface::getIndexSpecs(OperationContext* opCtx, const NamespaceString& ns, bool includeBuildUUIDs) { - // Note that 'ns' must be an unsharded collection. The indexes for a sharded collection must be - // read from a shard with a chunk instead of the primary shard. + // Note that 'ns' must be an unsharded collection. The indexes for a sharded collection must + // be read from a shard with a chunk instead of the primary shard. auto cachedDbInfo = uassertStatusOK(Grid::get(opCtx)->catalogCache()->getDatabase(opCtx, ns.db())); auto shard = uassertStatusOK( diff --git a/src/mongo/db/pipeline/sharded_agg_helpers.cpp b/src/mongo/db/pipeline/sharded_agg_helpers.cpp index aca3354a9b4..bf29d0c5e18 100644 --- a/src/mongo/db/pipeline/sharded_agg_helpers.cpp +++ b/src/mongo/db/pipeline/sharded_agg_helpers.cpp @@ -61,8 +61,7 @@ #include "mongo/s/query/document_source_merge_cursors.h" #include "mongo/s/query/establish_cursors.h" #include "mongo/s/query_analysis_sampler_util.h" -#include "mongo/s/router.h" -#include "mongo/s/stale_exception.h" +#include "mongo/s/router_role.h" #include "mongo/s/transaction_router.h" #include "mongo/util/fail_point.h" #include "mongo/util/overloaded_visitor.h" @@ -1183,7 +1182,7 @@ DispatchShardPipelineResults dispatchShardPipeline( targetedSampleId, ReadPreferenceSetting::get(opCtx)); - } catch (const StaleConfigException& e) { + } catch (const ExceptionFor<ErrorCodes::StaleConfig>& e) { // Check to see if the command failed because of a stale shard version or something // else. auto staleInfo = e.extraInfo<StaleConfigInfo>(); diff --git a/src/mongo/db/s/collection_sharding_state.h b/src/mongo/db/s/collection_sharding_state.h index d01c3802e90..3b9ef19b7cf 100644 --- a/src/mongo/db/s/collection_sharding_state.h +++ b/src/mongo/db/s/collection_sharding_state.h @@ -120,7 +120,7 @@ public: /** * If the shard currently doesn't know whether the collection is sharded or not, it will throw a - * StaleConfig exception. + * StaleConfig error. * * If the request doesn't have a shard version all collections will be treated as UNSHARDED. * @@ -137,7 +137,7 @@ public: * * If the shard currently doesn't know whether the collection is sharded or not, or if the * expected shard version doesn't match with the one in the OperationShardingState, it will - * throw a StaleConfig exception. + * throw a StaleConfig error. * * If the operation context contains an 'atClusterTime', the returned filtering object will be * tied to a specific point in time. Otherwise, it will reference the latest cluster time @@ -168,7 +168,7 @@ public: /** * Checks whether the shard version in the operation context is compatible with the shard - * version of the collection and if not, throws StaleConfigException populated with the received + * version of the collection and if not, throws StaleConfig error populated with the received * and wanted versions. * * If the request is not versioned all collections will be treated as UNSHARDED. diff --git a/src/mongo/db/s/migration_source_manager.h b/src/mongo/db/s/migration_source_manager.h index 35a1c407e6b..3baf05e40c6 100644 --- a/src/mongo/db/s/migration_source_manager.h +++ b/src/mongo/db/s/migration_source_manager.h @@ -95,8 +95,8 @@ public: * * May throw any exception. Known exceptions are: * - InvalidOptions if the operation context is missing shard version - * - StaleConfigException if the expected collection version does not match what we find it - * to be after acquiring the distributed lock. + * - StaleConfig if the expected placement version does not match what we find it to be after + * acquiring the distributed lock */ MigrationSourceManager(OperationContext* opCtx, ShardsvrMoveRange&& request, diff --git a/src/mongo/db/s/move_primary_source_manager.h b/src/mongo/db/s/move_primary_source_manager.h index c4de0f2254f..2b87aa02c8c 100644 --- a/src/mongo/db/s/move_primary_source_manager.h +++ b/src/mongo/db/s/move_primary_source_manager.h @@ -72,10 +72,10 @@ public: * Instantiates a new movePrimary source manager. Must be called with the distributed lock * acquired in advance (not asserted). * - * May throw any exception. Known exceptions (TODO) are: + * May throw any exception. Known exceptions are: * - InvalidOptions if the operation context is missing database version - * - StaleConfigException if the expected database version does not match what we find it - * to be after acquiring the distributed lock. + * - StaleConfig if the expected database version does not match what we find it to be after + * acquiring the distributed lock */ MovePrimarySourceManager(OperationContext* opCtx, ShardMovePrimary requestArgs, diff --git a/src/mongo/db/s/resharding/resharding_collection_cloner.cpp b/src/mongo/db/s/resharding/resharding_collection_cloner.cpp index ff5f03d3a35..f4749deff37 100644 --- a/src/mongo/db/s/resharding/resharding_collection_cloner.cpp +++ b/src/mongo/db/s/resharding/resharding_collection_cloner.cpp @@ -27,9 +27,6 @@ * it in the license file. */ - -#include "mongo/platform/basic.h" - #include "mongo/db/s/resharding/resharding_collection_cloner.h" #include <utility> diff --git a/src/mongo/db/s/resharding/resharding_data_copy_util.h b/src/mongo/db/s/resharding/resharding_data_copy_util.h index bf3f21a7118..7ed2b0b2aae 100644 --- a/src/mongo/db/s/resharding/resharding_data_copy_util.h +++ b/src/mongo/db/s/resharding/resharding_data_copy_util.h @@ -149,8 +149,8 @@ void updateSessionRecord(OperationContext* opCtx, /** * Calls and returns the value from the supplied lambda function. * - * If a StaleConfig exception is thrown during its execution, then this function will attempt to - * refresh the collection and invoke the supplied lambda function a second time. + * If a StaleConfig error is thrown during its execution, then this function will attempt to refresh + * the collection and invoke the supplied lambda function a second time. */ template <typename Callable> auto withOneStaleConfigRetry(OperationContext* opCtx, Callable&& callable) { diff --git a/src/mongo/db/s/resharding/resharding_oplog_batch_applier.cpp b/src/mongo/db/s/resharding/resharding_oplog_batch_applier.cpp index fb5ddd671f6..5bb0690e909 100644 --- a/src/mongo/db/s/resharding/resharding_oplog_batch_applier.cpp +++ b/src/mongo/db/s/resharding/resharding_oplog_batch_applier.cpp @@ -27,13 +27,8 @@ * it in the license file. */ - -#include "mongo/platform/basic.h" - #include "mongo/db/s/resharding/resharding_oplog_batch_applier.h" -#include <memory> - #include "mongo/db/s/operation_sharding_state.h" #include "mongo/db/s/resharding/resharding_data_copy_util.h" #include "mongo/db/s/resharding/resharding_future_util.h" @@ -43,7 +38,6 @@ #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kResharding - namespace mongo { ReshardingOplogBatchApplier::ReshardingOplogBatchApplier( diff --git a/src/mongo/db/s/shard_filtering_metadata_refresh.h b/src/mongo/db/s/shard_filtering_metadata_refresh.h index 6da07eb6d8b..7e01f6c684b 100644 --- a/src/mongo/db/s/shard_filtering_metadata_refresh.h +++ b/src/mongo/db/s/shard_filtering_metadata_refresh.h @@ -39,8 +39,8 @@ namespace mongo { class OperationContext; /** - * Must be invoked whenever code, which is executing on a shard encounters a StaleConfig exception - * and should be passed the placement version from the 'version received' in the exception. If the + * Must be invoked whenever code, which is executing on a shard encounters a StaleConfig error and + * should be passed the placement version from the 'version received' in the exception. If the * shard's current placement version is behind 'chunkVersionReceived', causes the shard's filtering * metadata to be refreshed from the config server, otherwise does nothing and immediately returns. * If there are other threads currently performing refresh, blocks so that only one of them hits the diff --git a/src/mongo/rpc/get_status_from_command_result.cpp b/src/mongo/rpc/get_status_from_command_result.cpp index 2607ff15e3c..315d4ef3135 100644 --- a/src/mongo/rpc/get_status_from_command_result.cpp +++ b/src/mongo/rpc/get_status_from_command_result.cpp @@ -50,7 +50,7 @@ Status getStatusFromCommandResult(const BSONObj& result) { BSONElement codeElement = result["code"]; BSONElement errmsgElement = result["errmsg"]; - // StaleConfigException doesn't pass "ok" in legacy servers + // StaleConfig doesn't pass "ok" in legacy servers BSONElement dollarErrElement = result["$err"]; if (okElement.eoo() && dollarErrElement.eoo()) { diff --git a/src/mongo/rpc/legacy_reply_builder.h b/src/mongo/rpc/legacy_reply_builder.h index 45e986ab9ed..22a9c4566f6 100644 --- a/src/mongo/rpc/legacy_reply_builder.h +++ b/src/mongo/rpc/legacy_reply_builder.h @@ -49,7 +49,7 @@ public: LegacyReplyBuilder(Message&&); ~LegacyReplyBuilder() final; - // Override of setCommandReply specifically used to handle StaleConfigException. + // Override of setCommandReply specifically used to handle StaleConfig errors LegacyReplyBuilder& setCommandReply(Status nonOKStatus, BSONObj extraErrorInfo) final; LegacyReplyBuilder& setRawCommandReply(const BSONObj& commandReply) final; diff --git a/src/mongo/s/SConscript b/src/mongo/s/SConscript index 97744139c45..de925d3f271 100644 --- a/src/mongo/s/SConscript +++ b/src/mongo/s/SConscript @@ -77,7 +77,7 @@ env.Library( 'multi_statement_transaction_requests_sender.cpp', 'router_transactions_metrics.cpp', 'router_transactions_stats.idl', - 'router.cpp', + 'router_role.cpp', 'session_catalog_router.cpp', 'shard_key_pattern_query_util.cpp', 'stale_shard_version_helpers.cpp', @@ -87,7 +87,6 @@ env.Library( LIBDEPS=[ '$BUILD_DIR/mongo/db/commands/txn_cmd_request', '$BUILD_DIR/mongo/db/pipeline/process_interface/mongo_process_interface', - '$BUILD_DIR/mongo/db/query/op_metrics', '$BUILD_DIR/mongo/db/repl/read_concern_args', '$BUILD_DIR/mongo/db/session/logical_session_id_helpers', '$BUILD_DIR/mongo/db/session/session_catalog', @@ -100,6 +99,7 @@ env.Library( '$BUILD_DIR/mongo/db/catalog/collection_uuid_mismatch_info', '$BUILD_DIR/mongo/db/internal_transactions_feature_flag', '$BUILD_DIR/mongo/db/mongohasher', + '$BUILD_DIR/mongo/db/query/op_metrics', '$BUILD_DIR/mongo/db/query/query_planner', '$BUILD_DIR/mongo/db/session/sessions_collection', ], diff --git a/src/mongo/s/cluster_commands_helpers.h b/src/mongo/s/cluster_commands_helpers.h index c6a860a06fe..549e91b529c 100644 --- a/src/mongo/s/cluster_commands_helpers.h +++ b/src/mongo/s/cluster_commands_helpers.h @@ -86,7 +86,8 @@ boost::intrusive_ptr<ExpressionContext> makeExpressionContextWithDefaultsForTarg * Dispatches all the specified requests in parallel and waits until all complete, returning a * vector of the same size and positions as that of 'requests'. * - * Throws StaleConfigException if any remote returns a stale shardVersion error. + * Throws StaleConfig if any of the remotes returns that error, regardless of what the other errors + * are. */ std::vector<AsyncRequestsSender::Response> gatherResponses( OperationContext* opCtx, @@ -170,7 +171,7 @@ std::vector<AsyncRequestsSender::Response> scatterGatherUnversionedTargetAllShar * the collection has query sampling enabled and the rate-limited sampler successfully generates a * sample id for it. * - * Does not retry on StaleConfigException. + * Does not retry on StaleConfig errors. */ [[nodiscard]] std::vector<AsyncRequestsSender::Response> scatterGatherVersionedTargetByRoutingTable( OperationContext* opCtx, @@ -191,7 +192,7 @@ std::vector<AsyncRequestsSender::Response> scatterGatherUnversionedTargetAllShar * * Callers can specify shards to skip, even if these shards would be otherwise targeted. * - * Allows StaleConfigException errors to append to the response list. + * Allows StaleConfig errors to append to the response list. */ std::vector<AsyncRequestsSender::Response> scatterGatherVersionedTargetByRoutingTableNoThrowOnStaleShardVersionErrors( @@ -222,7 +223,7 @@ AsyncRequestsSender::Response executeCommandAgainstDatabasePrimary( * Utility for dispatching commands against the shard with the MinKey chunk for the namespace and * attaching the appropriate shard version. * - * Does not retry on StaleConfigException. + * Does not retry on StaleConfig errors. */ AsyncRequestsSender::Response executeCommandAgainstShardWithMinKeyChunk( OperationContext* opCtx, diff --git a/src/mongo/s/commands/strategy.h b/src/mongo/s/commands/strategy.h index 1e04130f657..73916229f7f 100644 --- a/src/mongo/s/commands/strategy.h +++ b/src/mongo/s/commands/strategy.h @@ -41,8 +41,8 @@ public: /** * Executes a command from either OP_QUERY or OP_MSG wire protocols. * - * Catches StaleConfigException errors and retries the command automatically after refreshing - * the metadata for the failing namespace. + * Catches StaleConfig errors and retries the command automatically after refreshing the + * metadata for the failing namespace. */ static Future<DbResponse> clientCommand(std::shared_ptr<RequestExecutionContext> rec); }; diff --git a/src/mongo/s/router.cpp b/src/mongo/s/router_role.cpp index 180c9766bee..ad2e47f2b64 100644 --- a/src/mongo/s/router.cpp +++ b/src/mongo/s/router_role.cpp @@ -28,7 +28,7 @@ */ -#include "mongo/s/router.h" +#include "mongo/s/router_role.h" #include "mongo/logv2/log.h" #include "mongo/s/grid.h" @@ -36,7 +36,6 @@ #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding - namespace mongo { namespace sharding { namespace router { @@ -71,32 +70,34 @@ CachedDatabaseInfo DBPrimaryRouter::_getRoutingInfo(OperationContext* opCtx) con } void DBPrimaryRouter::_onException(RouteContext* context, Status s) { - if (++context->numAttempts > kMaxNumStaleVersionRetries) { - uassertStatusOKWithContext( - s, - str::stream() << "Exceeded maximum number of " << kMaxNumStaleVersionRetries - << " retries attempting \'" << context->comment << "\'"); - } else { - LOGV2_DEBUG(637590, - 3, - "Retrying {description}. Got error: {status}", - "description"_attr = context->comment, - "status"_attr = s); - } - auto catalogCache = Grid::get(_service)->catalogCache(); if (s == ErrorCodes::StaleDbVersion) { auto si = s.extraInfo<StaleDbRoutingVersion>(); - invariant(si); - invariant(si->getDb() == _db, - str::stream() << "StaleDbVersion on unexpected database. Expected " << _db - << ", received " << si->getDb()); + tassert(6375900, "StaleDbVersion must have extraInfo", si); + tassert(6375901, + str::stream() << "StaleDbVersion on unexpected database. Expected " << _db + << ", received " << si->getDb(), + si->getDb() == _db); catalogCache->onStaleDatabaseVersion(si->getDb(), si->getVersionWanted()); } else { uassertStatusOK(s); } + + if (++context->numAttempts > kMaxNumStaleVersionRetries) { + uassertStatusOKWithContext( + s, + str::stream() << "Exceeded maximum number of " << kMaxNumStaleVersionRetries + << " retries attempting \'" << context->comment << "\'"); + } else { + LOGV2_DEBUG(6375902, + 3, + "Retrying database primary routing operation", + "attempt"_attr = context->numAttempts, + "comment"_attr = context->comment, + "status"_attr = s); + } } CollectionRouter::CollectionRouter(ServiceContext* service, NamespaceString nss) @@ -127,42 +128,39 @@ CollectionRoutingInfo CollectionRouter::_getRoutingInfo(OperationContext* opCtx) } void CollectionRouter::_onException(RouteContext* context, Status s) { + auto catalogCache = Grid::get(_service)->catalogCache(); + + if (s == ErrorCodes::StaleDbVersion) { + auto si = s.extraInfo<StaleDbRoutingVersion>(); + tassert(6375903, "StaleDbVersion must have extraInfo", si); + catalogCache->onStaleDatabaseVersion(si->getDb(), si->getVersionWanted()); + } else if (s == ErrorCodes::StaleConfig) { + auto si = s.extraInfo<StaleConfigInfo>(); + tassert(6375904, "StaleConfig must have extraInfo", si); + catalogCache->invalidateShardOrEntireCollectionEntryForShardedCollection( + si->getNss(), si->getVersionWanted(), si->getShardId()); + } else if (s == ErrorCodes::StaleEpoch) { + auto si = s.extraInfo<StaleEpochInfo>(); + tassert(6375905, "StaleEpoch must have extra info", si); + catalogCache->invalidateShardOrEntireCollectionEntryForShardedCollection( + si->getNss(), si->getVersionWanted(), ShardId()); + } else { + uassertStatusOK(s); + } + if (++context->numAttempts > kMaxNumStaleVersionRetries) { uassertStatusOKWithContext( s, str::stream() << "Exceeded maximum number of " << kMaxNumStaleVersionRetries << " retries attempting \'" << context->comment << "\'"); } else { - LOGV2_DEBUG(637591, + LOGV2_DEBUG(6375906, 3, - "Retrying {description}. Got error: {status}", - "description"_attr = context->comment, + "Retrying collection routing operation", + "attempt"_attr = context->numAttempts, + "comment"_attr = context->comment, "status"_attr = s); } - - auto catalogCache = Grid::get(_service)->catalogCache(); - - if (s.isA<ErrorCategory::StaleShardVersionError>()) { - if (auto si = s.extraInfo<StaleConfigInfo>()) { - invariant(si->getNss() == _nss, - str::stream() << "StaleConfig on unexpected namespace. Expected " << _nss - << ", received " << si->getNss()); - catalogCache->invalidateShardOrEntireCollectionEntryForShardedCollection( - _nss, si->getVersionWanted(), si->getShardId()); - } else { - catalogCache->invalidateCollectionEntry_LINEARIZABLE(_nss); - } - } else if (s == ErrorCodes::StaleDbVersion) { - auto si = s.extraInfo<StaleDbRoutingVersion>(); - invariant(si); - invariant(si->getDb() == _nss.db(), - str::stream() << "StaleDbVersion on unexpected database. Expected " << _nss.db() - << ", received " << si->getDb()); - - catalogCache->onStaleDatabaseVersion(si->getDb(), si->getVersionWanted()); - } else { - uassertStatusOK(s); - } } } // namespace router diff --git a/src/mongo/s/router.h b/src/mongo/s/router_role.h index 2004f377aa8..2004f377aa8 100644 --- a/src/mongo/s/router.h +++ b/src/mongo/s/router_role.h diff --git a/src/mongo/s/stale_exception.cpp b/src/mongo/s/stale_exception.cpp index c9e66b5ee14..7c2e0531054 100644 --- a/src/mongo/s/stale_exception.cpp +++ b/src/mongo/s/stale_exception.cpp @@ -69,10 +69,31 @@ std::shared_ptr<const ErrorExtraInfo> StaleConfigInfo::parse(const BSONObj& obj) void StaleEpochInfo::serialize(BSONObjBuilder* bob) const { bob->append("ns", _nss.ns()); + if (_received) + _received->serialize("vReceived", bob); + if (_wanted) + _wanted->serialize("vWanted", bob); } std::shared_ptr<const ErrorExtraInfo> StaleEpochInfo::parse(const BSONObj& obj) { - return std::make_shared<StaleEpochInfo>(NamespaceString(obj["ns"].String())); + boost::optional<ShardVersion> received; + if (auto vReceivedElem = obj["vReceived"]) + received = ShardVersion::parse(vReceivedElem); + + boost::optional<ShardVersion> wanted; + if (auto vWantedElem = obj["vWanted"]) + wanted = ShardVersion::parse(vWantedElem); + + uassert(6375907, + str::stream() << "Either both vReceived (" << received << ")" + << " and vWanted (" << wanted << ") must be present or none", + received.is_initialized() == wanted.is_initialized()); + + if (received) + return std::make_shared<StaleEpochInfo>( + NamespaceString(obj["ns"].String()), *received, *wanted); + else + return std::make_shared<StaleEpochInfo>(NamespaceString(obj["ns"].String())); } void StaleDbRoutingVersion::serialize(BSONObjBuilder* bob) const { diff --git a/src/mongo/s/stale_exception.h b/src/mongo/s/stale_exception.h index da34725af68..3da30934803 100644 --- a/src/mongo/s/stale_exception.h +++ b/src/mongo/s/stale_exception.h @@ -82,7 +82,7 @@ public: void serialize(BSONObjBuilder* bob) const; static std::shared_ptr<const ErrorExtraInfo> parse(const BSONObj& obj); -protected: +private: NamespaceString _nss; ShardVersion _received; boost::optional<ShardVersion> _wanted; @@ -93,24 +93,41 @@ protected: boost::optional<OperationType> _duringOperationType; }; +// TODO (SERVER-74380): Rename the StaleEpoch code to StaleDownstreamRouter and the info to +// StaleDownstreamRouterInfo class StaleEpochInfo final : public ErrorExtraInfo { public: static constexpr auto code = ErrorCodes::StaleEpoch; + StaleEpochInfo(NamespaceString nss, ShardVersion received, ShardVersion wanted) + : _nss(std::move(nss)), _received(received), _wanted(wanted) {} + + // TODO (SERVER-74380): Remove this constructor StaleEpochInfo(NamespaceString nss) : _nss(std::move(nss)) {} const auto& getNss() const { return _nss; } + const auto& getVersionReceived() const { + return _received; + } + + const auto& getVersionWanted() const { + return _wanted; + } + void serialize(BSONObjBuilder* bob) const; static std::shared_ptr<const ErrorExtraInfo> parse(const BSONObj& obj); private: NamespaceString _nss; -}; -using StaleConfigException = ExceptionFor<ErrorCodes::StaleConfig>; + // TODO (SERVER-74380): These two fields are boost::optional for backwards compatibility. Either + // both of them are boost::none or both are set. + boost::optional<ShardVersion> _received; + boost::optional<ShardVersion> _wanted; +}; class StaleDbRoutingVersion final : public ErrorExtraInfo { public: diff --git a/src/mongo/s/stale_exception_test.cpp b/src/mongo/s/stale_exception_test.cpp index 1e92988516d..a91f52edd8f 100644 --- a/src/mongo/s/stale_exception_test.cpp +++ b/src/mongo/s/stale_exception_test.cpp @@ -54,7 +54,7 @@ TEST(StaleExceptionTest, StaleConfigInfoSerializationTest) { ASSERT_EQUALS(deserializedInfo->getShardId(), kShardId); } -TEST(StaleExceptionTest, StaleEpochInfoSerializationTest) { +TEST(StaleExceptionTest, StaleEpochInfoLegacySerializationTest) { StaleEpochInfo info(kNss); // Serialize @@ -66,6 +66,24 @@ TEST(StaleExceptionTest, StaleEpochInfoSerializationTest) { std::static_pointer_cast<const StaleEpochInfo>(StaleEpochInfo::parse(bob.obj())); ASSERT_EQUALS(deserializedInfo->getNss(), kNss); + ASSERT(!deserializedInfo->getVersionReceived()); + ASSERT(!deserializedInfo->getVersionWanted()); +} + +TEST(StaleExceptionTest, StaleEpochInfoSerializationTest) { + StaleEpochInfo info(kNss, ShardVersion::UNSHARDED(), ShardVersion::UNSHARDED()); + + // Serialize + BSONObjBuilder bob; + info.serialize(&bob); + + // Deserialize + auto deserializedInfo = + std::static_pointer_cast<const StaleEpochInfo>(StaleEpochInfo::parse(bob.obj())); + + ASSERT_EQ(deserializedInfo->getNss(), kNss); + ASSERT_EQ(deserializedInfo->getVersionReceived(), ShardVersion::UNSHARDED()); + ASSERT_EQ(deserializedInfo->getVersionWanted(), ShardVersion::UNSHARDED()); } } // namespace |