diff options
author | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2017-12-05 13:43:52 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-01-25 13:11:41 +0000 |
commit | a414e4ceafb45dc6ebf4daeb9198f0a7f3fb189c (patch) | |
tree | b24bc9f81262ef81afe509a8f5d214545bcdbdbc /src | |
parent | fe718c5644621df759085ee2bd55ec96252816cb (diff) | |
download | mongo-a414e4ceafb45dc6ebf4daeb9198f0a7f3fb189c.tar.gz |
SERVER-45599 Backport of SERVER-32198: Split CollectionShardingState::getMetadata into three methods
o getCurrentMetadataIfKnown - which returns the current filtering
metadata if any is available
o getMetadataForOperation - which returns the metadata which is required
by the current opertion, based on the OperationShardingState
o getCurrentMetadata - which returns the currently available filtering
metadata (or UNSHARDED if not known)
This is in preparation for making
getMetadataForOperation/getCurrentMetadata throw
StaleShardVersion exception if the metadata has not been loaded yet.
This is a partial cherry-pick from dcf7e0dd89d34f58b592f1adb3d41e5edd6e2012, adapted for the 4.0 branch.
Diffstat (limited to 'src')
29 files changed, 513 insertions, 442 deletions
diff --git a/src/mongo/base/global_initializer_registerer.cpp b/src/mongo/base/global_initializer_registerer.cpp index 7c458470649..67b83dfd6a1 100644 --- a/src/mongo/base/global_initializer_registerer.cpp +++ b/src/mongo/base/global_initializer_registerer.cpp @@ -50,7 +50,6 @@ GlobalInitializerRegisterer::GlobalInitializerRegisterer(std::string name, std::move(prerequisites), std::move(dependents)); - if (Status::OK() != status) { std::cerr << "Attempt to add global initializer failed, status: " << status << std::endl; ::abort(); diff --git a/src/mongo/db/catalog/create_collection.cpp b/src/mongo/db/catalog/create_collection.cpp index da4ecaf40ab..a9bf373f38a 100644 --- a/src/mongo/db/catalog/create_collection.cpp +++ b/src/mongo/db/catalog/create_collection.cpp @@ -89,6 +89,14 @@ Status createCollection(OperationContext* opCtx, !options["capped"].trueValue() || options["size"].isNumber() || options.hasField("$nExtents")); + CollectionOptions collectionOptions; + { + Status status = collectionOptions.parse(options, kind); + if (!status.isOK()) { + return status; + } + } + return writeConflictRetry(opCtx, "create", nss.ns(), [&] { Lock::DBLock dbXLock(opCtx, nss.db(), MODE_X); const bool shardVersionCheck = true; @@ -99,12 +107,6 @@ Status createCollection(OperationContext* opCtx, str::stream() << "Not primary while creating collection " << nss.ns()); } - CollectionOptions collectionOptions; - Status status = collectionOptions.parse(options, kind); - if (!status.isOK()) { - return status; - } - if (collectionOptions.isView()) { // If the `system.views` collection does not exist, create it in a separate // WriteUnitOfWork. @@ -117,9 +119,8 @@ Status createCollection(OperationContext* opCtx, // Create collection. const bool createDefaultIndexes = true; - status = Database::userCreateNS( + Status status = Database::userCreateNS( opCtx, ctx.db(), nss.ns(), collectionOptions, createDefaultIndexes, idIndex); - if (!status.isOK()) { return status; } diff --git a/src/mongo/db/catalog/rename_collection.cpp b/src/mongo/db/catalog/rename_collection.cpp index d300df28d52..6aadfe754b4 100644 --- a/src/mongo/db/catalog/rename_collection.cpp +++ b/src/mongo/db/catalog/rename_collection.cpp @@ -171,10 +171,10 @@ Status renameCollectionCommon(OperationContext* opCtx, // Make sure the source collection is not sharded. { - auto const css = CollectionShardingState::get(opCtx, source); - if (css->getMetadata(opCtx)->isSharded()) { + auto* const css = CollectionShardingState::get(opCtx, source); + const auto metadata = css->getCurrentMetadata(); + if (metadata->isSharded()) return {ErrorCodes::IllegalOperation, "source namespace cannot be sharded"}; - } } // Disallow renaming from a replicated to an unreplicated collection or vice versa. @@ -211,10 +211,10 @@ Status renameCollectionCommon(OperationContext* opCtx, } { - auto const css = CollectionShardingState::get(opCtx, target); - if (css->getMetadata(opCtx)->isSharded()) { + auto* const css = CollectionShardingState::get(opCtx, target); + const auto metadata = css->getCurrentMetadata(); + if (metadata->isSharded()) return {ErrorCodes::IllegalOperation, "cannot rename to a sharded collection"}; - } } // RenameCollectionForCommand cannot drop target by renaming. diff --git a/src/mongo/db/commands/SConscript b/src/mongo/db/commands/SConscript index b0717ce99c7..0c419beeb36 100644 --- a/src/mongo/db/commands/SConscript +++ b/src/mongo/db/commands/SConscript @@ -347,7 +347,7 @@ env.Library( '$BUILD_DIR/mongo/db/repl/oplog', '$BUILD_DIR/mongo/db/repl/repl_coordinator_interface', '$BUILD_DIR/mongo/db/rw_concern_d', - '$BUILD_DIR/mongo/db/s/sharding_catalog_manager', + '$BUILD_DIR/mongo/db/s/sharding_runtime_d', '$BUILD_DIR/mongo/s/sharding_legacy_api', '$BUILD_DIR/mongo/util/net/ssl_manager', 'core', diff --git a/src/mongo/db/commands/count_cmd.cpp b/src/mongo/db/commands/count_cmd.cpp index cd2891f6266..50cd6752024 100644 --- a/src/mongo/db/commands/count_cmd.cpp +++ b/src/mongo/db/commands/count_cmd.cpp @@ -152,7 +152,8 @@ public: // Prevent chunks from being cleaned up during yields - this allows us to only check the // version on initial entry into count. - auto rangePreserver = CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); + auto rangePreserver = + CollectionShardingState::get(opCtx, nss)->getMetadataForOperation(opCtx); auto statusWithPlanExecutor = getExecutorCount(opCtx, collection, request.getValue(), true /*explain*/); @@ -205,7 +206,8 @@ public: // Prevent chunks from being cleaned up during yields - this allows us to only check the // version on initial entry into count. - auto rangePreserver = CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); + auto rangePreserver = + CollectionShardingState::get(opCtx, nss)->getMetadataForOperation(opCtx); auto statusWithPlanExecutor = getExecutorCount(opCtx, collection, request.getValue(), false /*explain*/); diff --git a/src/mongo/db/commands/create_indexes.cpp b/src/mongo/db/commands/create_indexes.cpp index 167d5f3161b..fd71731c664 100644 --- a/src/mongo/db/commands/create_indexes.cpp +++ b/src/mongo/db/commands/create_indexes.cpp @@ -234,8 +234,6 @@ std::vector<BSONObj> resolveDefaultsAndRemoveExistingIndexes(OperationContext* o return specs; } -} // namespace - /** * { createIndexes : "bar", indexes : [ { ns : "test.bar", key : { x : 1 }, name: "x_1" } ] } */ @@ -243,16 +241,17 @@ class CmdCreateIndex : public ErrmsgCommandDeprecated { public: CmdCreateIndex() : ErrmsgCommandDeprecated(kCommandName) {} - virtual bool supportsWriteConcern(const BSONObj& cmd) const override { + bool supportsWriteConcern(const BSONObj& cmd) const override { return true; } + AllowedOnSecondary secondaryAllowed(ServiceContext*) const override { return AllowedOnSecondary::kNever; } - virtual Status checkAuthForCommand(Client* client, - const std::string& dbname, - const BSONObj& cmdObj) const { + Status checkAuthForCommand(Client* client, + const std::string& dbname, + const BSONObj& cmdObj) const override { ActionSet actions; actions.addAction(ActionType::createIndex); Privilege p(parseResourcePattern(dbname, cmdObj), actions); @@ -261,26 +260,22 @@ public: return Status(ErrorCodes::Unauthorized, "Unauthorized"); } - virtual bool errmsgRun(OperationContext* opCtx, - const string& dbname, - const BSONObj& cmdObj, - string& errmsg, - BSONObjBuilder& result) { + bool errmsgRun(OperationContext* opCtx, + const string& dbname, + const BSONObj& cmdObj, + string& errmsg, + BSONObjBuilder& result) override { const NamespaceString ns(CommandHelpers::parseNsCollectionRequired(dbname, cmdObj)); + uassertStatusOK(userAllowedWriteNS(ns)); - Status status = userAllowedWriteNS(ns); - uassertStatusOK(status); - - // Disallow users from creating new indexes on config.transactions since the sessions - // code was optimized to not update indexes. + // Disallow users from creating new indexes on config.transactions since the sessions code + // was optimized to not update indexes uassert(ErrorCodes::IllegalOperation, str::stream() << "not allowed to create index on " << ns.ns(), ns != NamespaceString::kSessionTransactionsTableNamespace); - auto specsWithStatus = - parseAndValidateIndexSpecs(opCtx, ns, cmdObj, serverGlobalParams.featureCompatibility); - uassertStatusOK(specsWithStatus.getStatus()); - auto specs = std::move(specsWithStatus.getValue()); + auto specs = uassertStatusOK( + parseAndValidateIndexSpecs(opCtx, ns, cmdObj, serverGlobalParams.featureCompatibility)); // Do not use AutoGetOrCreateDb because we may relock the database in mode X. Lock::DBLock dbLock(opCtx, ns.db(), MODE_IX); @@ -335,8 +330,7 @@ public: uasserted(ErrorCodes::CommandNotSupportedOnView, errmsg); } - status = userAllowedCreateNS(ns.db(), ns.coll()); - uassertStatusOK(status); + uassertStatusOK(userAllowedCreateNS(ns.db(), ns.coll())); writeConflictRetry(opCtx, kCommandName, ns.ns(), [&] { WriteUnitOfWork wunit(opCtx); @@ -368,8 +362,7 @@ public: for (size_t i = 0; i < specs.size(); i++) { const BSONObj& spec = specs[i]; if (spec["unique"].trueValue()) { - status = checkUniqueIndexConstraints(opCtx, ns, spec["key"].Obj()); - uassertStatusOK(status); + _checkUniqueIndexConstraints(opCtx, ns, spec["key"].Obj()); } } @@ -435,24 +428,24 @@ public: } private: - static Status checkUniqueIndexConstraints(OperationContext* opCtx, - const NamespaceString& nss, - const BSONObj& newIdxKey) { + static void _checkUniqueIndexConstraints(OperationContext* opCtx, + const NamespaceString& nss, + const BSONObj& newIdxKey) { invariant(opCtx->lockState()->isCollectionLockedForMode(nss.ns(), MODE_X)); - auto metadata = CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); - if (metadata->isSharded()) { - ShardKeyPattern shardKeyPattern(metadata->getKeyPattern()); - if (!shardKeyPattern.isUniqueIndexCompatible(newIdxKey)) { - return Status(ErrorCodes::CannotCreateIndex, - str::stream() << "cannot create unique index over " << newIdxKey - << " with shard key pattern " - << shardKeyPattern.toBSON()); - } - } - + const auto metadata = CollectionShardingState::get(opCtx, nss)->getCurrentMetadata(); + if (!metadata->isSharded()) + return; - return Status::OK(); + const ShardKeyPattern shardKeyPattern(metadata->getKeyPattern()); + uassert(ErrorCodes::CannotCreateIndex, + str::stream() << "cannot create unique index over " << newIdxKey + << " with shard key pattern " + << shardKeyPattern.toBSON(), + shardKeyPattern.isUniqueIndexCompatible(newIdxKey)); } + } cmdCreateIndex; -} + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/commands/geo_near_cmd.cpp b/src/mongo/db/commands/geo_near_cmd.cpp index e014a13e02c..336ce83ebba 100644 --- a/src/mongo/db/commands/geo_near_cmd.cpp +++ b/src/mongo/db/commands/geo_near_cmd.cpp @@ -231,7 +231,8 @@ public: // Prevent chunks from being cleaned up during yields - this allows us to only check the // version on initial entry into geoNear. - auto rangePreserver = CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); + auto rangePreserver = + CollectionShardingState::get(opCtx, nss)->getMetadataForOperation(opCtx); const auto& readConcernArgs = repl::ReadConcernArgs::get(opCtx); const PlanExecutor::YieldPolicy yieldPolicy = diff --git a/src/mongo/db/commands/mr.cpp b/src/mongo/db/commands/mr.cpp index 3637948a1e7..4a23c78b7fb 100644 --- a/src/mongo/db/commands/mr.cpp +++ b/src/mongo/db/commands/mr.cpp @@ -62,8 +62,7 @@ #include "mongo/db/query/plan_summary_stats.h" #include "mongo/db/query/query_planner.h" #include "mongo/db/repl/replication_coordinator.h" -#include "mongo/db/s/collection_metadata.h" -#include "mongo/db/s/collection_sharding_state.h" +#include "mongo/db/s/collection_sharding_runtime.h" #include "mongo/db/s/sharding_state.h" #include "mongo/db/server_options.h" #include "mongo/db/service_context.h" @@ -454,6 +453,9 @@ void State::prepTempCollection() { << status.code()); } wuow.commit(); + + CollectionShardingRuntime::get(_opCtx, _config.incLong) + ->setFilteringMetadata(_opCtx, CollectionMetadata()); }); } @@ -503,6 +505,7 @@ void State::prepTempCollection() { CollectionOptions options = finalOptions; options.temp = true; + // If a UUID for the final output collection was sent by mongos (i.e., the final output // collection is sharded), use the UUID mongos sent when creating the temp collection. // When the temp collection is renamed to the final output collection, the UUID will be @@ -534,6 +537,9 @@ void State::prepTempCollection() { _opCtx, _config.tempNamespace, uuid, *it, false); } wuow.commit(); + + CollectionShardingRuntime::get(_opCtx, _config.tempNamespace) + ->setFilteringMetadata(_opCtx, CollectionMetadata()); }); } @@ -1426,12 +1432,9 @@ public: uassert(16149, "cannot run map reduce without the js engine", getGlobalScriptEngine()); - // Prevent sharding state from changing during the MR. - const auto collMetadata = [&] { - // Get metadata before we check our version, to make sure it doesn't increment in the - // meantime + const auto metadata = [&] { AutoGetCollectionForReadCommand autoColl(opCtx, config.nss); - return CollectionShardingState::get(opCtx, config.nss)->getMetadata(opCtx); + return CollectionShardingState::get(opCtx, config.nss)->getMetadataForOperation(opCtx); }(); bool shouldHaveData = false; @@ -1499,17 +1502,13 @@ public: const ExtensionsCallbackReal extensionsCallback(opCtx, &config.nss); const boost::intrusive_ptr<ExpressionContext> expCtx; - auto statusWithCQ = + auto cq = uassertStatusOKWithContext( CanonicalQuery::canonicalize(opCtx, std::move(qr), expCtx, extensionsCallback, - MatchExpressionParser::kAllowAllSpecialFeatures); - if (!statusWithCQ.isOK()) { - uasserted(17238, "Can't canonicalize query " + config.filter.toString()); - return 0; - } - std::unique_ptr<CanonicalQuery> cq = std::move(statusWithCQ.getValue()); + MatchExpressionParser::kAllowAllSpecialFeatures), + str::stream() << "Can't canonicalize query " << config.filter); unique_ptr<PlanExecutor, PlanExecutor::Deleter> exec; { @@ -1536,38 +1535,37 @@ public: Timer mt; - // go through each doc BSONObj o; PlanExecutor::ExecState execState; while (PlanExecutor::ADVANCED == (execState = exec->getNext(&o, NULL))) { - o = o.getOwned(); // we will be accessing outside of the lock - // check to see if this is a new object we don't own yet - // because of a chunk migration - if (collMetadata->isSharded()) { - ShardKeyPattern kp(collMetadata->getKeyPattern()); - if (!collMetadata->keyBelongsToMe(kp.extractShardKeyFromDoc(o))) { + o = o.getOwned(); // The object will be accessed outside of collection lock + + // Check to see if this is a new object we don't own yet because of a chunk + // migration + if (metadata->isSharded()) { + ShardKeyPattern kp(metadata->getKeyPattern()); + if (!metadata->keyBelongsToMe(kp.extractShardKeyFromDoc(o))) { continue; } } - // do map if (config.verbose) mt.reset(); + config.mapper->map(o); + if (config.verbose) mapTime += mt.micros(); - // Check if the state accumulated so far needs to be written to a - // collection. This may yield the DB lock temporarily and then - // acquire it again. - // + // Check if the state accumulated so far needs to be written to a collection. + // This may yield the DB lock temporarily and then acquire it again. numInputs++; if (numInputs % 100 == 0) { Timer t; - // TODO: As an optimization, we might want to do the save/restore - // state and yield inside the reduceAndSpillInMemoryState method, so - // it only happens if necessary. + // TODO: As an optimization, we might want to do the save/restore state and + // yield inside the reduceAndSpillInMemoryState method, so it only happens + // if necessary. exec->saveState(); scopedAutoDb.reset(); diff --git a/src/mongo/db/exec/update.cpp b/src/mongo/db/exec/update.cpp index 559673940db..eb568bfb8e4 100644 --- a/src/mongo/db/exec/update.cpp +++ b/src/mongo/db/exec/update.cpp @@ -135,7 +135,7 @@ bool shouldRestartUpdateIfNoLongerMatches(const UpdateStageParams& params) { const std::vector<std::unique_ptr<FieldRef>>* getImmutableFields(OperationContext* opCtx, const NamespaceString& ns) { - auto metadata = CollectionShardingState::get(opCtx, ns)->getMetadata(opCtx); + auto metadata = CollectionShardingState::get(opCtx, ns)->getCurrentMetadata(); if (metadata->isSharded()) { const std::vector<std::unique_ptr<FieldRef>>& fields = metadata->getKeyPatternFields(); // Return shard-keys as immutable for the update system. @@ -288,13 +288,12 @@ BSONObj UpdateStage::transformAndUpdate(const Snapshotted<BSONObj>& oldObj, Reco RecordId newRecordId; OplogUpdateEntryArgs args; if (!request->isExplain()) { - invariant(_collection); - auto* css = CollectionShardingState::get(getOpCtx(), _collection->ns()); + auto* const css = CollectionShardingState::get(getOpCtx(), _collection->ns()); args.nss = _collection->ns(); args.uuid = _collection->uuid(); args.stmtId = request->getStmtId(); args.update = logObj; - auto metadata = css->getMetadata(getOpCtx()); + auto metadata = css->getCurrentMetadata(); args.criteria = metadata->extractDocumentKey(newObj); uassert(16980, "Multi-update operations require all documents to have an '_id' field", diff --git a/src/mongo/db/op_observer_impl.cpp b/src/mongo/db/op_observer_impl.cpp index 35cfdb77237..15abf920fa6 100644 --- a/src/mongo/db/op_observer_impl.cpp +++ b/src/mongo/db/op_observer_impl.cpp @@ -311,7 +311,7 @@ OpTimeBundle replLogApplyOps(OperationContext* opCtx, BSONObj OpObserverImpl::getDocumentKey(OperationContext* opCtx, NamespaceString const& nss, BSONObj const& doc) { - auto metadata = CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); + auto metadata = CollectionShardingState::get(opCtx, nss)->getMetadataForOperation(opCtx); return metadata->extractDocumentKey(doc).getOwned(); } diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp index 0661430956f..9523261eb8a 100644 --- a/src/mongo/db/pipeline/pipeline_d.cpp +++ b/src/mongo/db/pipeline/pipeline_d.cpp @@ -157,7 +157,7 @@ StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> createRandomCursorEx if (ShardingState::get(opCtx)->needCollectionMetadata(opCtx, collection->ns().ns())) { auto shardFilterStage = stdx::make_unique<ShardFilterStage>( opCtx, - CollectionShardingState::get(opCtx, collection->ns())->getMetadata(opCtx), + CollectionShardingState::get(opCtx, collection->ns())->getMetadataForOperation(opCtx), ws.get(), stage.release()); return PlanExecutor::make(opCtx, @@ -610,7 +610,7 @@ DBClientBase* PipelineD::MongoDInterface::directClient() { bool PipelineD::MongoDInterface::isSharded(OperationContext* opCtx, const NamespaceString& nss) { AutoGetCollectionForReadCommand autoColl(opCtx, nss); auto const css = CollectionShardingState::get(opCtx, nss); - return css->getMetadata(opCtx)->isSharded(); + return css->getCurrentMetadata()->isSharded(); } BSONObj PipelineD::MongoDInterface::insert(const boost::intrusive_ptr<ExpressionContext>& expCtx, @@ -752,7 +752,7 @@ Status PipelineD::MongoDInterface::attachCursorSourceToPipeline( auto css = CollectionShardingState::get(expCtx->opCtx, expCtx->ns); uassert(4567, str::stream() << "from collection (" << expCtx->ns.ns() << ") cannot be sharded", - !css->getMetadata(expCtx->opCtx)->isSharded()); + !css->getCurrentMetadata()->isSharded()); PipelineD::prepareCursorSource(autoColl->getCollection(), expCtx->ns, nullptr, pipeline); @@ -800,7 +800,7 @@ std::pair<std::vector<FieldPath>, bool> PipelineD::MongoDInterface::collectDocum auto scm = [opCtx, &nss]() -> ScopedCollectionMetadata { AutoGetCollection autoColl(opCtx, nss, MODE_IS); - return CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); + return CollectionShardingState::get(opCtx, nss)->getCurrentMetadata(); }(); // Collection is not sharded or UUID mismatch implies collection has been dropped and recreated diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index 4f7f4477081..defba5f3c69 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -178,8 +178,8 @@ void fillOutPlannerParams(OperationContext* opCtx, // If the caller wants a shard filter, make sure we're actually sharded. if (plannerParams->options & QueryPlannerParams::INCLUDE_SHARD_FILTER) { - auto collMetadata = - CollectionShardingState::get(opCtx, canonicalQuery->nss())->getMetadata(opCtx); + auto collMetadata = CollectionShardingState::get(opCtx, canonicalQuery->nss()) + ->getMetadataForOperation(opCtx); if (collMetadata->isSharded()) { plannerParams->shardKey = collMetadata->getKeyPattern(); } else { @@ -313,7 +313,8 @@ StatusWith<PrepareExecutionResult> prepareExecution(OperationContext* opCtx, if (plannerParams.options & QueryPlannerParams::INCLUDE_SHARD_FILTER) { root = make_unique<ShardFilterStage>( opCtx, - CollectionShardingState::get(opCtx, canonicalQuery->nss())->getMetadata(opCtx), + CollectionShardingState::get(opCtx, canonicalQuery->nss()) + ->getMetadataForOperation(opCtx), ws, root.release()); } diff --git a/src/mongo/db/query/stage_builder.cpp b/src/mongo/db/query/stage_builder.cpp index c23997773dd..c1390d0d18c 100644 --- a/src/mongo/db/query/stage_builder.cpp +++ b/src/mongo/db/query/stage_builder.cpp @@ -302,11 +302,11 @@ PlanStage* buildStages(OperationContext* opCtx, if (nullptr == childStage) { return nullptr; } - return new ShardFilterStage( - opCtx, - CollectionShardingState::get(opCtx, collection->ns())->getMetadata(opCtx), - ws, - childStage); + return new ShardFilterStage(opCtx, + CollectionShardingState::get(opCtx, collection->ns()) + ->getMetadataForOperation(opCtx), + ws, + childStage); } case STAGE_KEEP_MUTATIONS: { const KeepMutationsNode* km = static_cast<const KeepMutationsNode*>(root); diff --git a/src/mongo/db/s/cleanup_orphaned_cmd.cpp b/src/mongo/db/s/cleanup_orphaned_cmd.cpp index 3cf9ec3728d..8bb1c5546e0 100644 --- a/src/mongo/db/s/cleanup_orphaned_cmd.cpp +++ b/src/mongo/db/s/cleanup_orphaned_cmd.cpp @@ -81,11 +81,16 @@ CleanupResult cleanupOrphanedData(OperationContext* opCtx, { AutoGetCollection autoColl(opCtx, ns, MODE_IX); auto* const css = CollectionShardingRuntime::get(opCtx, ns); + const auto optMetadata = css->getCurrentMetadataIfKnown(); + uassert(ErrorCodes::ConflictingOperationInProgress, + str::stream() << "Unable to establish sharding status for collection " << ns.ns(), + optMetadata); + + const auto& metadata = *optMetadata; - auto metadata = css->getMetadata(opCtx); if (!metadata->isSharded()) { - log() << "skipping orphaned data cleanup for " << ns.toString() - << ", collection is not sharded"; + LOG(0) << "skipping orphaned data cleanup for " << ns.ns() + << ", collection is not sharded"; return CleanupResult_Done; } diff --git a/src/mongo/db/s/collection_metadata_filtering_test.cpp b/src/mongo/db/s/collection_metadata_filtering_test.cpp index 7f777dfa179..46db156c36a 100644 --- a/src/mongo/db/s/collection_metadata_filtering_test.cpp +++ b/src/mongo/db/s/collection_metadata_filtering_test.cpp @@ -32,6 +32,7 @@ #include "mongo/db/catalog_raii.h" #include "mongo/db/s/collection_sharding_runtime.h" +#include "mongo/db/s/operation_sharding_state.h" #include "mongo/s/catalog/type_chunk.h" #include "mongo/s/shard_server_test_fixture.h" @@ -107,6 +108,12 @@ protected: } _manager->setFilteringMetadata(CollectionMetadata(cm, ShardId("0"))); + + auto& oss = OperationShardingState::get(operationContext()); + const auto version = cm->getVersion(ShardId("0")); + BSONObjBuilder builder; + version.appendToCommand(&builder); + oss.initializeClientRoutingVersions(kNss, builder.obj()); } std::shared_ptr<MetadataManager> _manager; @@ -134,7 +141,7 @@ TEST_F(CollectionMetadataFilteringTest, FilterDocumentsInTheFuture) { AutoGetCollection autoColl(operationContext(), kNss, MODE_IS); auto* const css = CollectionShardingState::get(operationContext(), kNss); - testFn(css->getMetadata(operationContext())); + testFn(css->getMetadataForOperation(operationContext())); } { @@ -165,7 +172,7 @@ TEST_F(CollectionMetadataFilteringTest, FilterDocumentsInThePast) { AutoGetCollection autoColl(operationContext(), kNss, MODE_IS); auto* const css = CollectionShardingState::get(operationContext(), kNss); - testFn(css->getMetadata(operationContext())); + testFn(css->getMetadataForOperation(operationContext())); } { @@ -204,7 +211,7 @@ TEST_F(CollectionMetadataFilteringTest, FilterDocumentsTooFarInThePastThrowsStal AutoGetCollection autoColl(operationContext(), kNss, MODE_IS); auto* const css = CollectionShardingState::get(operationContext(), kNss); - testFn(css->getMetadata(operationContext())); + testFn(css->getMetadataForOperation(operationContext())); } { diff --git a/src/mongo/db/s/collection_sharding_runtime.cpp b/src/mongo/db/s/collection_sharding_runtime.cpp index f6f92850408..6fc514ca2a7 100644 --- a/src/mongo/db/s/collection_sharding_runtime.cpp +++ b/src/mongo/db/s/collection_sharding_runtime.cpp @@ -45,6 +45,30 @@ namespace { // How long to wait before starting cleanup of an emigrated chunk range MONGO_EXPORT_SERVER_PARAMETER(orphanCleanupDelaySecs, int, 900); // 900s = 15m +/** + * Returns whether the specified namespace is used for sharding-internal purposes only and can never + * be marked as anything other than UNSHARDED, because the call sites which reference these + * collections are not prepared to handle StaleConfig errors. + */ +bool isNamespaceAlwaysUnsharded(const NamespaceString& nss) { + // There should never be a case to mark as sharded collections which are on the config server + if (serverGlobalParams.clusterRole != ClusterRole::ShardServer) + return true; + + // Local and admin never have sharded collections + if (nss.db() == NamespaceString::kLocalDb || nss.db() == NamespaceString::kAdminDb) + return true; + + // Certain config collections can never be sharded + if (nss == NamespaceString::kSessionTransactionsTableNamespace) + return true; + + if (nss.isSystemDotProfile()) + return true; + + return false; +} + } // namespace CollectionShardingRuntime::CollectionShardingRuntime(ServiceContext* sc, @@ -52,7 +76,11 @@ CollectionShardingRuntime::CollectionShardingRuntime(ServiceContext* sc, executor::TaskExecutor* rangeDeleterExecutor) : CollectionShardingState(nss), _nss(std::move(nss)), - _metadataManager(std::make_shared<MetadataManager>(sc, _nss, rangeDeleterExecutor)) {} + _metadataManager(std::make_shared<MetadataManager>(sc, _nss, rangeDeleterExecutor)) { + if (isNamespaceAlwaysUnsharded(_nss)) { + _metadataManager->setFilteringMetadata(CollectionMetadata()); + } +} CollectionShardingRuntime* CollectionShardingRuntime::get(OperationContext* opCtx, const NamespaceString& nss) { @@ -62,13 +90,17 @@ CollectionShardingRuntime* CollectionShardingRuntime::get(OperationContext* opCt void CollectionShardingRuntime::setFilteringMetadata(OperationContext* opCtx, CollectionMetadata newMetadata) { + invariant(!newMetadata.isSharded() || !isNamespaceAlwaysUnsharded(_nss), + str::stream() << "Namespace " << _nss.ns() << " must never be sharded."); invariant(opCtx->lockState()->isCollectionLockedForMode(_nss.ns(), MODE_X)); _metadataManager->setFilteringMetadata(std::move(newMetadata)); } void CollectionShardingRuntime::clearFilteringMetadata() { - _metadataManager->clearFilteringMetadata(); + if (!isNamespaceAlwaysUnsharded(_nss)) { + _metadataManager->clearFilteringMetadata(); + } } auto CollectionShardingRuntime::beginReceive(ChunkRange const& range) -> CleanupNotification { diff --git a/src/mongo/db/s/collection_sharding_state.cpp b/src/mongo/db/s/collection_sharding_state.cpp index 45645383969..f65f262a029 100644 --- a/src/mongo/db/s/collection_sharding_state.cpp +++ b/src/mongo/db/s/collection_sharding_state.cpp @@ -34,6 +34,7 @@ #include "mongo/db/s/collection_sharding_state.h" +#include "mongo/db/repl/read_concern_args.h" #include "mongo/db/s/operation_sharding_state.h" #include "mongo/db/s/sharded_connection_info.h" #include "mongo/s/stale_exception.h" @@ -72,8 +73,9 @@ public: stdx::lock_guard<stdx::mutex> lg(_mutex); for (auto& coll : _collections) { - ScopedCollectionMetadata metadata = coll.second->getMetadata(opCtx); - if (metadata->isSharded()) { + const auto optMetadata = coll.second->getCurrentMetadataIfKnown(); + if (optMetadata) { + const auto& metadata = *optMetadata; versionB.appendTimestamp(coll.first, metadata->getShardVersion().toLong()); } } @@ -109,6 +111,28 @@ private: const auto kUnshardedCollection = std::make_shared<UnshardedCollection>(); +ChunkVersion getOperationReceivedVersion(OperationContext* opCtx, const NamespaceString& nss) { + auto& oss = OperationShardingState::get(opCtx); + + // If there is a version attached to the OperationContext, use it as the received version, + // otherwise get the received version from the ShardedConnectionInfo + if (oss.hasShardVersion()) { + return oss.getShardVersion(nss); + } else if (auto const info = ShardedConnectionInfo::get(opCtx->getClient(), false)) { + auto connectionShardVersion = info->getVersion(nss.ns()); + + // For backwards compatibility with map/reduce, which can access up to 2 sharded collections + // in a single call, the lack of version for a namespace on the collection must be treated + // as UNSHARDED + return connectionShardVersion.value_or(ChunkVersion::UNSHARDED()); + } else { + // There is no shard version information on either 'opCtx' or 'client'. This means that the + // operation represented by 'opCtx' is unversioned, and the shard version is always OK for + // unversioned operations. + return ChunkVersion::IGNORED(); + } +} + } // namespace CollectionShardingState::CollectionShardingState(NamespaceString nss) : _nss(std::move(nss)) {} @@ -127,37 +151,49 @@ void CollectionShardingState::report(OperationContext* opCtx, BSONObjBuilder* bu collectionsMap->report(opCtx, builder); } -ScopedCollectionMetadata CollectionShardingState::getMetadata(OperationContext* opCtx) { +ScopedCollectionMetadata CollectionShardingState::getMetadataForOperation(OperationContext* opCtx) { + const auto receivedShardVersion = getOperationReceivedVersion(opCtx, _nss); + + if (ChunkVersion::isIgnoredVersion(receivedShardVersion)) { + return {kUnshardedCollection}; + } + const auto atClusterTime = repl::ReadConcernArgs::get(opCtx).getArgsAtClusterTime(); auto optMetadata = _getMetadata(atClusterTime); + if (!optMetadata) return {kUnshardedCollection}; - return std::move(*optMetadata); + return {std::move(*optMetadata)}; } -void CollectionShardingState::checkShardVersionOrThrow(OperationContext* opCtx) { - auto& oss = OperationShardingState::get(opCtx); +ScopedCollectionMetadata CollectionShardingState::getCurrentMetadata() { + auto optMetadata = _getMetadata(boost::none); - const auto receivedShardVersion = [&] { - // If there is a version attached to the OperationContext, use it as the received version, - // otherwise get the received version from the ShardedConnectionInfo - if (oss.hasShardVersion()) { - return oss.getShardVersion(_nss); - } else if (auto const info = ShardedConnectionInfo::get(opCtx->getClient(), false)) { - auto connectionShardVersion = info->getVersion(_nss.ns()); - - // For backwards compatibility with map/reduce, which can access up to 2 sharded - // collections in a single call, the lack of version for a namespace on the collection - // must be treated as UNSHARDED - return connectionShardVersion.value_or(ChunkVersion::UNSHARDED()); - } else { - // There is no shard version information on either 'opCtx' or 'client'. This means that - // the operation represented by 'opCtx' is unversioned, and the shard version is always - // OK for unversioned operations. - return ChunkVersion::IGNORED(); - } - }(); + if (!optMetadata) + return {kUnshardedCollection}; + + return {std::move(*optMetadata)}; +} + +boost::optional<ScopedCollectionMetadata> CollectionShardingState::getCurrentMetadataIfKnown() { + return _getMetadata(boost::none); +} + +boost::optional<ChunkVersion> CollectionShardingState::getCurrentShardVersionIfKnown() { + const auto optMetadata = _getMetadata(boost::none); + if (!optMetadata) + return boost::none; + + const auto& metadata = *optMetadata; + if (!metadata->isSharded()) + return ChunkVersion::UNSHARDED(); + + return metadata->getCollVersion(); +} + +void CollectionShardingState::checkShardVersionOrThrow(OperationContext* opCtx) { + const auto receivedShardVersion = getOperationReceivedVersion(opCtx, _nss); if (ChunkVersion::isIgnoredVersion(receivedShardVersion)) { return; @@ -167,8 +203,7 @@ void CollectionShardingState::checkShardVersionOrThrow(OperationContext* opCtx) invariant(repl::ReadConcernArgs::get(opCtx).getLevel() != repl::ReadConcernLevel::kAvailableReadConcern); - // Set this for error messaging purposes before potentially returning false. - auto metadata = getMetadata(opCtx); + const auto metadata = getMetadataForOperation(opCtx); const auto wantedShardVersion = metadata->isSharded() ? metadata->getShardVersion() : ChunkVersion::UNSHARDED(); @@ -178,6 +213,7 @@ void CollectionShardingState::checkShardVersionOrThrow(OperationContext* opCtx) if (criticalSectionSignal) { // Set migration critical section on operation sharding state: operation will wait for the // migration to finish before returning failure and retrying. + auto& oss = OperationShardingState::get(opCtx); oss.setMigrationCriticalSectionSignal(criticalSectionSignal); uasserted(StaleConfigInfo(_nss, receivedShardVersion, wantedShardVersion), diff --git a/src/mongo/db/s/collection_sharding_state.h b/src/mongo/db/s/collection_sharding_state.h index 77e11534f73..e964dbced12 100644 --- a/src/mongo/db/s/collection_sharding_state.h +++ b/src/mongo/db/s/collection_sharding_state.h @@ -73,14 +73,28 @@ public: static void report(OperationContext* opCtx, BSONObjBuilder* builder); /** - * Returns the chunk filtering metadata for the collection. The returned object is safe to - * access outside of collection lock. + * Returns the chunk filtering metadata that the current operation should be using for that + * collection or otherwise throws if it has not been loaded yet. If the operation does not + * require a specific shard version, returns an UNSHARDED metadata. The returned object is safe + * to access outside of collection lock. * * If the operation context contains an 'atClusterTime' property, the returned filtering * metadata will be tied to a specific point in time. Otherwise it will reference the latest * time available. */ - ScopedCollectionMetadata getMetadata(OperationContext* opCtx); + ScopedCollectionMetadata getMetadataForOperation(OperationContext* opCtx); + ScopedCollectionMetadata getCurrentMetadata(); + + /** + * Returns boost::none if the filtering metadata for the collection is not known yet. Otherwise + * returns the most recently refreshed from the config server metadata or shard version. + * + * These methods do not check for the shard version that the operation requires and should only + * be used for cases such as checking whether a particular config server update has taken + * effect. + */ + boost::optional<ScopedCollectionMetadata> getCurrentMetadataIfKnown(); + boost::optional<ChunkVersion> getCurrentShardVersionIfKnown(); /** * Checks whether the shard version in the operation context is compatible with the shard diff --git a/src/mongo/db/s/collection_sharding_state_test.cpp b/src/mongo/db/s/collection_sharding_state_test.cpp index f5c4a3aad43..a8361bd1490 100644 --- a/src/mongo/db/s/collection_sharding_state_test.cpp +++ b/src/mongo/db/s/collection_sharding_state_test.cpp @@ -33,6 +33,7 @@ #include "mongo/db/catalog_raii.h" #include "mongo/db/s/collection_sharding_runtime.h" #include "mongo/db/s/op_observer_sharding_impl.h" +#include "mongo/db/s/operation_sharding_state.h" #include "mongo/db/s/type_shard_identity.h" #include "mongo/s/shard_server_test_fixture.h" @@ -58,12 +59,25 @@ CollectionMetadata makeAMetadata(BSONObj const& keyPattern) { return CollectionMetadata(std::move(cm), ShardId("this")); } -using DeleteStateTest = ShardServerTestFixture; +class DeleteStateTest : public ShardServerTestFixture { +protected: + void setCollectionFilteringMetadata(CollectionMetadata metadata) { + AutoGetCollection autoColl(operationContext(), kTestNss, MODE_X); + auto* const css = CollectionShardingRuntime::get(operationContext(), kTestNss); + css->setFilteringMetadata(operationContext(), std::move(metadata)); + + auto& oss = OperationShardingState::get(operationContext()); + const auto version = metadata.getShardVersion(); + BSONObjBuilder builder; + version.appendToCommand(&builder); + oss.initializeClientRoutingVersions(kTestNss, builder.obj()); + } +}; TEST_F(DeleteStateTest, MakeDeleteStateUnsharded) { + setCollectionFilteringMetadata(CollectionMetadata()); + AutoGetCollection autoColl(operationContext(), kTestNss, MODE_IX); - auto* const css = CollectionShardingRuntime::get(operationContext(), kTestNss); - css->setFilteringMetadata(operationContext(), CollectionMetadata()); auto doc = BSON("key3" << "abc" @@ -83,11 +97,10 @@ TEST_F(DeleteStateTest, MakeDeleteStateUnsharded) { } TEST_F(DeleteStateTest, MakeDeleteStateShardedWithoutIdInShardKey) { - AutoGetCollection autoColl(operationContext(), kTestNss, MODE_IX); - auto* const css = CollectionShardingRuntime::get(operationContext(), kTestNss); - // Push a CollectionMetadata with a shard key not including "_id"... - css->setFilteringMetadata(operationContext(), makeAMetadata(BSON("key" << 1 << "key3" << 1))); + setCollectionFilteringMetadata(makeAMetadata(BSON("key" << 1 << "key3" << 1))); + + AutoGetCollection autoColl(operationContext(), kTestNss, MODE_IX); // The order of fields in `doc` deliberately does not match the shard key auto doc = BSON("key3" @@ -109,12 +122,10 @@ TEST_F(DeleteStateTest, MakeDeleteStateShardedWithoutIdInShardKey) { } TEST_F(DeleteStateTest, MakeDeleteStateShardedWithIdInShardKey) { - AutoGetCollection autoColl(operationContext(), kTestNss, MODE_IX); - auto* const css = CollectionShardingRuntime::get(operationContext(), kTestNss); - // Push a CollectionMetadata with a shard key that does have "_id" in the middle... - css->setFilteringMetadata(operationContext(), - makeAMetadata(BSON("key" << 1 << "_id" << 1 << "key2" << 1))); + setCollectionFilteringMetadata(makeAMetadata(BSON("key" << 1 << "_id" << 1 << "key2" << 1))); + + AutoGetCollection autoColl(operationContext(), kTestNss, MODE_IX); // The order of fields in `doc` deliberately does not match the shard key auto doc = BSON("key2" << true << "key3" @@ -134,13 +145,11 @@ TEST_F(DeleteStateTest, MakeDeleteStateShardedWithIdInShardKey) { } TEST_F(DeleteStateTest, MakeDeleteStateShardedWithIdHashInShardKey) { - AutoGetCollection autoColl(operationContext(), kTestNss, MODE_IX); - auto* const css = CollectionShardingRuntime::get(operationContext(), kTestNss); - // Push a CollectionMetadata with a shard key "_id", hashed. - auto aMetadata = makeAMetadata(BSON("_id" - << "hashed")); - css->setFilteringMetadata(operationContext(), std::move(aMetadata)); + setCollectionFilteringMetadata(makeAMetadata(BSON("_id" + << "hashed"))); + + AutoGetCollection autoColl(operationContext(), kTestNss, MODE_IX); auto doc = BSON("key2" << true << "_id" << "hello" diff --git a/src/mongo/db/s/get_shard_version_command.cpp b/src/mongo/db/s/get_shard_version_command.cpp index a67a90efafb..a51ce749549 100644 --- a/src/mongo/db/s/get_shard_version_command.cpp +++ b/src/mongo/db/s/get_shard_version_command.cpp @@ -111,27 +111,37 @@ public: AutoGetCollection autoColl(opCtx, nss, MODE_IS); auto* const css = CollectionShardingRuntime::get(opCtx, nss); - const auto metadata = css->getMetadata(opCtx); - if (metadata->isSharded()) { - result.appendTimestamp("global", metadata->getShardVersion().toLong()); + const auto optMetadata = css->getCurrentMetadataIfKnown(); + if (!optMetadata) { + result.append("global", "UNKNOWN"); + + if (cmdObj["fullMetadata"].trueValue()) { + result.append("metadata", BSONObj()); + } } else { - result.appendTimestamp("global", ChunkVersion::UNSHARDED().toLong()); - } + const auto& metadata = *optMetadata; - if (cmdObj["fullMetadata"].trueValue()) { - BSONObjBuilder metadataBuilder(result.subobjStart("metadata")); if (metadata->isSharded()) { - metadata->toBSONBasic(metadataBuilder); + result.appendTimestamp("global", metadata->getShardVersion().toLong()); + } else { + result.appendTimestamp("global", ChunkVersion::UNSHARDED().toLong()); + } + + if (cmdObj["fullMetadata"].trueValue()) { + BSONObjBuilder metadataBuilder(result.subobjStart("metadata")); + if (metadata->isSharded()) { + metadata->toBSONBasic(metadataBuilder); - BSONArrayBuilder chunksArr(metadataBuilder.subarrayStart("chunks")); - metadata->toBSONChunks(chunksArr); - chunksArr.doneFast(); + BSONArrayBuilder chunksArr(metadataBuilder.subarrayStart("chunks")); + metadata->toBSONChunks(chunksArr); + chunksArr.doneFast(); - BSONArrayBuilder pendingArr(metadataBuilder.subarrayStart("pending")); - css->toBSONPending(pendingArr); - pendingArr.doneFast(); + BSONArrayBuilder pendingArr(metadataBuilder.subarrayStart("pending")); + css->toBSONPending(pendingArr); + pendingArr.doneFast(); + } + metadataBuilder.doneFast(); } - metadataBuilder.doneFast(); } return true; diff --git a/src/mongo/db/s/merge_chunks_command.cpp b/src/mongo/db/s/merge_chunks_command.cpp index a4d42284520..1208f5a8b52 100644 --- a/src/mongo/db/s/merge_chunks_command.cpp +++ b/src/mongo/db/s/merge_chunks_command.cpp @@ -51,55 +51,46 @@ #include "mongo/util/mongoutils/str.h" namespace mongo { - -using std::string; -using std::vector; -using str::stream; - namespace { bool checkMetadataForSuccess(OperationContext* opCtx, const NamespaceString& nss, - const BSONObj& minKey, - const BSONObj& maxKey) { + const OID& epoch, + const ChunkRange& chunkRange) { const auto metadataAfterMerge = [&] { AutoGetCollection autoColl(opCtx, nss, MODE_IS); - return CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); + return CollectionShardingState::get(opCtx, nss)->getCurrentMetadata(); }(); uassert(ErrorCodes::StaleEpoch, - str::stream() << "Collection " << nss.ns() << " became unsharded", - metadataAfterMerge->isSharded()); + str::stream() << "Collection " << nss.ns() << " changed since merge start", + metadataAfterMerge->getCollVersion().epoch() == epoch); ChunkType chunk; - if (!metadataAfterMerge->getNextChunk(minKey, &chunk)) { + if (!metadataAfterMerge->getNextChunk(chunkRange.getMin(), &chunk)) { return false; } - return chunk.getMin().woCompare(minKey) == 0 && chunk.getMax().woCompare(maxKey) == 0; + return chunk.getMin().woCompare(chunkRange.getMin()) == 0 && + chunk.getMax().woCompare(chunkRange.getMax()) == 0; } -Status mergeChunks(OperationContext* opCtx, - const NamespaceString& nss, - const BSONObj& minKey, - const BSONObj& maxKey, - const OID& epoch) { - // Get the distributed lock - // TODO(SERVER-25086): Remove distLock acquisition from merge chunk - const string whyMessage = stream() << "merging chunks in " << nss.ns() << " from " << minKey - << " to " << maxKey; - - auto scopedDistLock = Grid::get(opCtx)->catalogClient()->getDistLockManager()->lock( - opCtx, nss.ns(), whyMessage, DistLockManager::kSingleLockAttemptTimeout); - - if (!scopedDistLock.isOK()) { - std::string context = stream() << "could not acquire collection lock for " << nss.ns() - << " to merge chunks in [" << redact(minKey) << ", " - << redact(maxKey) << ")"; - - warning() << context << causedBy(scopedDistLock.getStatus()); - return scopedDistLock.getStatus().withContext(context); - } +void mergeChunks(OperationContext* opCtx, + const NamespaceString& nss, + const BSONObj& minKey, + const BSONObj& maxKey, + const OID& epoch) { + const std::string whyMessage = str::stream() << "merging chunks in " << nss.ns() << " from " + << minKey << " to " << maxKey; + auto scopedDistLock = uassertStatusOKWithContext( + Grid::get(opCtx)->catalogClient()->getDistLockManager()->lock( + opCtx, nss.ns(), whyMessage, DistLockManager::kSingleLockAttemptTimeout), + str::stream() << "could not acquire collection lock for " << nss.ns() + << " to merge chunks in [" + << redact(minKey) + << ", " + << redact(maxKey) + << ")"); auto const shardingState = ShardingState::get(opCtx); @@ -109,39 +100,32 @@ Status mergeChunks(OperationContext* opCtx, const auto metadata = [&] { AutoGetCollection autoColl(opCtx, nss, MODE_IS); - return CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); + return CollectionShardingState::get(opCtx, nss)->getCurrentMetadata(); }(); - if (!metadata->isSharded()) { - std::string errmsg = stream() << "could not merge chunks, collection " << nss.ns() - << " is not sharded"; - - warning() << errmsg; - return {ErrorCodes::StaleEpoch, errmsg}; - } + uassert(ErrorCodes::StaleEpoch, + str::stream() << "Collection " << nss.ns() << " became unsharded", + metadata->isSharded()); const auto shardVersion = metadata->getShardVersion(); - - if (epoch.isSet() && shardVersion.epoch() != epoch) { - std::string errmsg = stream() - << "could not merge chunks, collection " << nss.ns() - << " has changed since merge was sent (sent epoch: " << epoch.toString() - << ", current epoch: " << shardVersion.epoch() << ")"; - - warning() << errmsg; - return {ErrorCodes::StaleEpoch, errmsg}; - } - - if (!metadata->isValidKey(minKey) || !metadata->isValidKey(maxKey)) { - std::string errmsg = stream() << "could not merge chunks, the range " - << redact(ChunkRange(minKey, maxKey).toString()) - << " is not valid" - << " for collection " << nss.ns() << " with key pattern " - << metadata->getKeyPattern().toString(); - - warning() << errmsg; - return Status(ErrorCodes::IllegalOperation, errmsg); - } + uassert(ErrorCodes::StaleEpoch, + str::stream() << "could not merge chunks, collection " << nss.ns() + << " has changed since merge was sent (sent epoch: " + << epoch.toString() + << ", current epoch: " + << shardVersion.epoch() + << ")", + !epoch.isSet() || shardVersion.epoch() == epoch); + + uassert(ErrorCodes::IllegalOperation, + str::stream() << "could not merge chunks, the range " + << redact(ChunkRange(minKey, maxKey).toString()) + << " is not valid" + << " for collection " + << nss.ns() + << " with key pattern " + << metadata->getKeyPattern().toString(), + metadata->isValidKey(minKey) && metadata->isValidKey(maxKey)); // // Get merged chunk information @@ -160,15 +144,15 @@ Status mergeChunks(OperationContext* opCtx, chunksToMerge.push_back(itChunk); } - if (chunksToMerge.empty()) { - std::string errmsg = stream() - << "could not merge chunks, collection " << nss.ns() << " range starting at " - << redact(minKey) << " and ending at " << redact(maxKey) << " does not belong to shard " - << shardingState->shardId(); - - warning() << errmsg; - return Status(ErrorCodes::IllegalOperation, errmsg); - } + uassert(ErrorCodes::IllegalOperation, + str::stream() << "could not merge chunks, collection " << nss.ns() + << " range starting at " + << redact(minKey) + << " and ending at " + << redact(maxKey) + << " does not belong to shard " + << shardingState->shardId(), + !chunksToMerge.empty()); // // Validate the range starts and ends at chunks and has no holes, error if not valid @@ -179,65 +163,56 @@ Status mergeChunks(OperationContext* opCtx, // minKey is inclusive bool minKeyInRange = rangeContains(firstDocMin, firstDocMax, minKey); - if (!minKeyInRange) { - std::string errmsg = stream() << "could not merge chunks, collection " << nss.ns() - << " range starting at " << redact(minKey) - << " does not belong to shard " << shardingState->shardId(); - - warning() << errmsg; - return Status(ErrorCodes::IllegalOperation, errmsg); - } + uassert(ErrorCodes::IllegalOperation, + str::stream() << "could not merge chunks, collection " << nss.ns() + << " range starting at " + << redact(minKey) + << " does not belong to shard " + << shardingState->shardId(), + minKeyInRange); BSONObj lastDocMin = chunksToMerge.back().getMin(); BSONObj lastDocMax = chunksToMerge.back().getMax(); // maxKey is exclusive bool maxKeyInRange = lastDocMin.woCompare(maxKey) < 0 && lastDocMax.woCompare(maxKey) >= 0; - if (!maxKeyInRange) { - std::string errmsg = stream() << "could not merge chunks, collection " << nss.ns() - << " range ending at " << redact(maxKey) - << " does not belong to shard " << shardingState->shardId(); - - warning() << errmsg; - return Status(ErrorCodes::IllegalOperation, errmsg); - } + uassert(ErrorCodes::IllegalOperation, + str::stream() << "could not merge chunks, collection " << nss.ns() + << " range ending at " + << redact(maxKey) + << " does not belong to shard " + << shardingState->shardId(), + maxKeyInRange); bool validRangeStartKey = firstDocMin.woCompare(minKey) == 0; bool validRangeEndKey = lastDocMax.woCompare(maxKey) == 0; - if (!validRangeStartKey || !validRangeEndKey) { - std::string errmsg = stream() - << "could not merge chunks, collection " << nss.ns() << " does not contain a chunk " - << (!validRangeStartKey ? "starting at " + redact(minKey.toString()) : "") - << (!validRangeStartKey && !validRangeEndKey ? " or " : "") - << (!validRangeEndKey ? "ending at " + redact(maxKey.toString()) : ""); - - warning() << errmsg; - return Status(ErrorCodes::IllegalOperation, errmsg); - } - - if (chunksToMerge.size() == 1) { - std::string errmsg = stream() << "could not merge chunks, collection " << nss.ns() - << " already contains chunk for " - << redact(ChunkRange(minKey, maxKey).toString()); - - warning() << errmsg; - return Status(ErrorCodes::IllegalOperation, errmsg); - } + uassert(ErrorCodes::IllegalOperation, + str::stream() << "could not merge chunks, collection " << nss.ns() + << " does not contain a chunk " + << (!validRangeStartKey ? "starting at " + redact(minKey.toString()) : "") + << (!validRangeStartKey && !validRangeEndKey ? " or " : "") + << (!validRangeEndKey ? "ending at " + redact(maxKey.toString()) : ""), + validRangeStartKey && validRangeEndKey); + uassert(ErrorCodes::IllegalOperation, + str::stream() << "could not merge chunks, collection " << nss.ns() + << " already contains chunk for " + << ChunkRange(minKey, maxKey).toString(), + chunksToMerge.size() > 1); // Look for hole in range for (size_t i = 1; i < chunksToMerge.size(); ++i) { - if (chunksToMerge[i - 1].getMax().woCompare(chunksToMerge[i].getMin()) != 0) { - std::string errmsg = stream() - << "could not merge chunks, collection " << nss.ns() << " has a hole in the range " - << redact(ChunkRange(minKey, maxKey).toString()) << " at " - << redact(ChunkRange(chunksToMerge[i - 1].getMax(), chunksToMerge[i].getMin()) - .toString()); - - warning() << errmsg; - return Status(ErrorCodes::IllegalOperation, errmsg); - } + uassert( + ErrorCodes::IllegalOperation, + str::stream() + << "could not merge chunks, collection " + << nss.ns() + << " has a hole in the range " + << ChunkRange(minKey, maxKey).toString() + << " at " + << ChunkRange(chunksToMerge[i - 1].getMax(), chunksToMerge[i].getMin()).toString(), + chunksToMerge[i - 1].getMax().woCompare(chunksToMerge[i].getMin()) == 0); } // @@ -251,42 +226,33 @@ Status mergeChunks(OperationContext* opCtx, auto configCmdObj = request.toConfigCommandBSON(ShardingCatalogClient::kMajorityWriteConcern.toBSON()); - auto cmdResponseStatus = Grid::get(opCtx)->shardRegistry()->getConfigShard()->runCommand( - opCtx, - ReadPreferenceSetting{ReadPreference::PrimaryOnly}, - "admin", - configCmdObj, - Shard::RetryPolicy::kIdempotent); + auto cmdResponse = + uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getConfigShard()->runCommand( + opCtx, + ReadPreferenceSetting{ReadPreference::PrimaryOnly}, + "admin", + configCmdObj, + Shard::RetryPolicy::kIdempotent)); // Refresh metadata to pick up new chunk definitions (regardless of the results returned from // running _configsvrCommitChunkMerge). forceShardFilteringMetadataRefresh(opCtx, nss); - // If we failed to get any response from the config server at all, despite retries, then we - // should just go ahead and fail the whole operation. - if (!cmdResponseStatus.isOK()) { - return cmdResponseStatus.getStatus(); - } - // If _configsvrCommitChunkMerge returned an error, look at this shard's metadata to determine // if the merge actually did happen. This can happen if there's a network error getting the // response from the first call to _configsvrCommitChunkMerge, but it actually succeeds, thus // the automatic retry fails with a precondition violation, for example. - auto commandStatus = std::move(cmdResponseStatus.getValue().commandStatus); - auto writeConcernStatus = std::move(cmdResponseStatus.getValue().writeConcernStatus); + auto commandStatus = std::move(cmdResponse.commandStatus); + auto writeConcernStatus = std::move(cmdResponse.writeConcernStatus); if ((!commandStatus.isOK() || !writeConcernStatus.isOK()) && - checkMetadataForSuccess(opCtx, nss, minKey, maxKey)) { - + checkMetadataForSuccess(opCtx, nss, epoch, ChunkRange(minKey, maxKey))) { LOG(1) << "mergeChunk [" << redact(minKey) << "," << redact(maxKey) << ") has already been committed."; - } else if (!commandStatus.isOK()) { - return commandStatus.withContext("Failed to commit chunk merge"); - } else if (!writeConcernStatus.isOK()) { - return writeConcernStatus.withContext("Failed to commit chunk merge"); } - return Status::OK(); + uassertStatusOKWithContext(commandStatus, "Failed to commit chunk merge"); + uassertStatusOKWithContext(writeConcernStatus, "Failed to commit chunk merge"); } class MergeChunksCommand : public ErrmsgCommandDeprecated { @@ -325,22 +291,22 @@ public: } // Required - static BSONField<string> nsField; - static BSONField<vector<BSONObj>> boundsField; + static BSONField<std::string> nsField; + static BSONField<std::vector<BSONObj>> boundsField; // Optional, if the merge is only valid for a particular epoch static BSONField<OID> epochField; bool errmsgRun(OperationContext* opCtx, - const string& dbname, + const std::string& dbname, const BSONObj& cmdObj, - string& errmsg, + std::string& errmsg, BSONObjBuilder& result) override { uassertStatusOK(ShardingState::get(opCtx)->canAcceptShardedCommands()); const NamespaceString nss(parseNs(dbname, cmdObj)); - vector<BSONObj> bounds; + std::vector<BSONObj> bounds; if (!FieldParser::extract(cmdObj, boundsField, &bounds, &errmsg)) { return false; } @@ -374,15 +340,14 @@ public: return false; } - auto mergeStatus = mergeChunks(opCtx, nss, minKey, maxKey, epoch); - uassertStatusOK(mergeStatus); + mergeChunks(opCtx, nss, minKey, maxKey, epoch); return true; } } mergeChunksCmd; -BSONField<string> MergeChunksCommand::nsField("mergeChunks"); -BSONField<vector<BSONObj>> MergeChunksCommand::boundsField("bounds"); +BSONField<std::string> MergeChunksCommand::nsField("mergeChunks"); +BSONField<std::vector<BSONObj>> MergeChunksCommand::boundsField("bounds"); BSONField<OID> MergeChunksCommand::epochField("epoch"); } // namespace diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp index 6bf3f7718b3..49a350d096b 100644 --- a/src/mongo/db/s/migration_destination_manager.cpp +++ b/src/mongo/db/s/migration_destination_manager.cpp @@ -1161,14 +1161,14 @@ CollectionShardingRuntime::CleanupNotification MigrationDestinationManager::_not AutoGetCollection autoColl(opCtx, _nss, MODE_IX, MODE_X); auto* const css = CollectionShardingRuntime::get(opCtx, _nss); - - auto metadata = css->getMetadata(opCtx); + const auto optMetadata = css->getCurrentMetadataIfKnown(); // This can currently happen because drops aren't synchronized with in-migrations. The idea for // checking this here is that in the future we shouldn't have this problem. - if (!metadata->isSharded() || metadata->getCollVersion().epoch() != _epoch) { + if (!optMetadata || !(*optMetadata)->isSharded() || + (*optMetadata)->getCollVersion().epoch() != _epoch) { return Status{ErrorCodes::StaleShardVersion, - str::stream() << "not noting chunk " << redact(range.toString()) + str::stream() << "Not marking chunk " << redact(range.toString()) << " as pending because the epoch of " << _nss.ns() << " changed"}; @@ -1192,14 +1192,14 @@ void MigrationDestinationManager::_forgetPending(OperationContext* opCtx, ChunkR UninterruptibleLockGuard noInterrupt(opCtx->lockState()); AutoGetCollection autoColl(opCtx, _nss, MODE_IX, MODE_X); auto* const css = CollectionShardingRuntime::get(opCtx, _nss); - - auto metadata = css->getMetadata(opCtx); + const auto optMetadata = css->getCurrentMetadataIfKnown(); // This can currently happen because drops aren't synchronized with in-migrations. The idea for // checking this here is that in the future we shouldn't have this problem. - if (!metadata->isSharded() || metadata->getCollVersion().epoch() != _epoch) { - log() << "no need to forget pending chunk " << redact(range.toString()) - << " because the epoch for " << _nss.ns() << " changed"; + if (!optMetadata || !(*optMetadata)->isSharded() || + (*optMetadata)->getCollVersion().epoch() != _epoch) { + LOG(0) << "No need to forget pending chunk " << redact(range.toString()) + << " because the epoch for " << _nss.ns() << " changed"; return; } diff --git a/src/mongo/db/s/migration_source_manager.cpp b/src/mongo/db/s/migration_source_manager.cpp index 57937d9c5fe..15ae251d65f 100644 --- a/src/mongo/db/s/migration_source_manager.cpp +++ b/src/mongo/db/s/migration_source_manager.cpp @@ -112,19 +112,6 @@ void refreshRecipientRoutingTable(OperationContext* opCtx, executor->scheduleRemoteCommand(request, noOp).getStatus().ignore(); } -Status checkCollectionEpochMatches(const ScopedCollectionMetadata& metadata, OID expectedEpoch) { - if (metadata->isSharded() && metadata->getCollVersion().epoch() == expectedEpoch) - return Status::OK(); - - return {ErrorCodes::IncompatibleShardingMetadata, - str::stream() << "The collection was dropped or recreated since the migration began. " - << "Expected collection epoch: " - << expectedEpoch.toString() - << ", but found: " - << (metadata->isSharded() ? metadata->getCollVersion().epoch().toString() - : "unsharded collection.")}; -} - } // namespace MONGO_FAIL_POINT_DEFINE(doNotRefreshRecipientAfterCommit); @@ -170,9 +157,15 @@ MigrationSourceManager::MigrationSourceManager(OperationContext* opCtx, collectionUUID = autoColl.getCollection()->uuid().value(); } - auto metadata = CollectionShardingState::get(opCtx, getNss())->getMetadata(opCtx); + auto optMetadata = + CollectionShardingState::get(opCtx, getNss())->getCurrentMetadataIfKnown(); + uassert(ErrorCodes::ConflictingOperationInProgress, + "The collection's sharding state was cleared by a concurrent operation", + optMetadata); + + auto& metadata = *optMetadata; uassert(ErrorCodes::IncompatibleShardingMetadata, - str::stream() << "cannot move chunks for an unsharded collection", + "Cannot move chunks for an unsharded collection", metadata->isSharded()); return std::make_tuple(std::move(metadata), std::move(collectionUUID)); @@ -241,14 +234,7 @@ Status MigrationSourceManager::startClone(OperationContext* opCtx) { { // Register for notifications from the replication subsystem - UninterruptibleLockGuard noInterrupt(opCtx->lockState()); - AutoGetCollection autoColl(opCtx, getNss(), MODE_IX, MODE_X); - auto* const css = CollectionShardingRuntime::get(opCtx, getNss()); - - const auto metadata = css->getMetadata(opCtx); - Status status = checkCollectionEpochMatches(metadata, _collectionEpoch); - if (!status.isOK()) - return status; + const auto metadata = _getCurrentMetadataAndCheckEpoch(opCtx); // Having the metadata manager registered on the collection sharding state is what indicates // that a chunk on that collection is being migrated. With an active migration, write @@ -257,6 +243,9 @@ Status MigrationSourceManager::startClone(OperationContext* opCtx) { _cloneDriver = stdx::make_unique<MigrationChunkClonerSourceLegacy>( _args, metadata->getKeyPattern(), _donorConnStr, _recipientHost); + UninterruptibleLockGuard noInterrupt(opCtx->lockState()); + AutoGetCollection autoColl(opCtx, getNss(), MODE_IX, MODE_X); + auto* const css = CollectionShardingRuntime::get(opCtx, getNss()); invariant(nullptr == std::exchange(msmForCsr(css), this)); _state = kCloning; } @@ -296,19 +285,7 @@ Status MigrationSourceManager::enterCriticalSection(OperationContext* opCtx) { _stats.totalDonorChunkCloneTimeMillis.addAndFetch(_cloneAndCommitTimer.millis()); _cloneAndCommitTimer.reset(); - { - const auto metadata = [&] { - UninterruptibleLockGuard noInterrupt(opCtx->lockState()); - AutoGetCollection autoColl(opCtx, _args.getNss(), MODE_IS); - return CollectionShardingState::get(opCtx, _args.getNss())->getMetadata(opCtx); - }(); - - Status status = checkCollectionEpochMatches(metadata, _collectionEpoch); - if (!status.isOK()) - return status; - - _notifyChangeStreamsOnRecipientFirstChunk(opCtx, metadata); - } + _notifyChangeStreamsOnRecipientFirstChunk(opCtx, _getCurrentMetadataAndCheckEpoch(opCtx)); // Mark the shard as running critical operation, which requires recovery on crash. // @@ -384,15 +361,7 @@ Status MigrationSourceManager::commitChunkMetadataOnConfig(OperationContext* opC BSONObjBuilder builder; { - const auto metadata = [&] { - UninterruptibleLockGuard noInterrupt(opCtx->lockState()); - AutoGetCollection autoColl(opCtx, _args.getNss(), MODE_IS); - return CollectionShardingState::get(opCtx, _args.getNss())->getMetadata(opCtx); - }(); - - Status status = checkCollectionEpochMatches(metadata, _collectionEpoch); - if (!status.isOK()) - return status; + const auto metadata = _getCurrentMetadataAndCheckEpoch(opCtx); boost::optional<ChunkType> controlChunkType = boost::none; ChunkType differentChunk; @@ -535,18 +504,7 @@ Status MigrationSourceManager::commitChunkMetadataOnConfig(OperationContext* opC << "' after commit failed"); } - auto refreshedMetadata = [&] { - UninterruptibleLockGuard noInterrupt(opCtx->lockState()); - AutoGetCollection autoColl(opCtx, getNss(), MODE_IS); - return CollectionShardingState::get(opCtx, getNss())->getMetadata(opCtx); - }(); - - if (!refreshedMetadata->isSharded()) { - return {ErrorCodes::NamespaceNotSharded, - str::stream() << "Chunk move failed because collection '" << getNss().ns() - << "' is no longer sharded. The migration commit error was: " - << migrationCommitStatus.toString()}; - } + const auto refreshedMetadata = _getCurrentMetadataAndCheckEpoch(opCtx); if (refreshedMetadata->keyBelongsToMe(_args.getMinKey())) { // This condition may only happen if the migration commit has failed for any reason @@ -571,8 +529,8 @@ Status MigrationSourceManager::commitChunkMetadataOnConfig(OperationContext* opC } // Migration succeeded - log() << "Migration succeeded and updated collection version to " - << refreshedMetadata->getCollVersion(); + LOG(0) << "Migration succeeded and updated collection version to " + << refreshedMetadata->getCollVersion(); MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangBeforeLeavingCriticalSection); @@ -661,6 +619,32 @@ void MigrationSourceManager::cleanupOnError(OperationContext* opCtx) { } } +ScopedCollectionMetadata MigrationSourceManager::_getCurrentMetadataAndCheckEpoch( + OperationContext* opCtx) { + auto metadata = [&] { + UninterruptibleLockGuard noInterrupt(opCtx->lockState()); + AutoGetCollection autoColl(opCtx, getNss(), MODE_IS); + auto* const css = CollectionShardingRuntime::get(opCtx, getNss()); + + const auto optMetadata = css->getCurrentMetadataIfKnown(); + uassert(ErrorCodes::ConflictingOperationInProgress, + "The collection's sharding state was cleared by a concurrent operation", + optMetadata); + return *optMetadata; + }(); + + uassert(ErrorCodes::ConflictingOperationInProgress, + str::stream() << "The collection was dropped or recreated since the migration began. " + << "Expected collection epoch: " + << _collectionEpoch.toString() + << ", but found: " + << (metadata->isSharded() ? metadata->getCollVersion().epoch().toString() + : "unsharded collection."), + metadata->isSharded() && metadata->getCollVersion().epoch() == _collectionEpoch); + + return metadata; +} + void MigrationSourceManager::_notifyChangeStreamsOnRecipientFirstChunk( OperationContext* opCtx, const ScopedCollectionMetadata& metadata) { // If this is not the first donation, there is nothing to be done diff --git a/src/mongo/db/s/migration_source_manager.h b/src/mongo/db/s/migration_source_manager.h index c053b73736e..eaaacdf0590 100644 --- a/src/mongo/db/s/migration_source_manager.h +++ b/src/mongo/db/s/migration_source_manager.h @@ -185,6 +185,8 @@ private: // comments explaining the various state transitions. enum State { kCreated, kCloning, kCloneCaughtUp, kCriticalSection, kCloneCompleted, kDone }; + ScopedCollectionMetadata _getCurrentMetadataAndCheckEpoch(OperationContext* opCtx); + /** * If this donation moves the first chunk to the recipient (i.e., the recipient didn't have any * chunks), this function writes a no-op message to the oplog, so that change stream will notice diff --git a/src/mongo/db/s/set_shard_version_command.cpp b/src/mongo/db/s/set_shard_version_command.cpp index 99a7a0b3bb6..2d99d322752 100644 --- a/src/mongo/db/s/set_shard_version_command.cpp +++ b/src/mongo/db/s/set_shard_version_command.cpp @@ -232,11 +232,12 @@ public: boost::optional<Lock::CollectionLock> collLock; collLock.emplace(opCtx->lockState(), nss.ns(), MODE_IS); - auto const css = CollectionShardingState::get(opCtx, nss); + auto* const css = CollectionShardingState::get(opCtx, nss); const ChunkVersion collectionShardVersion = [&] { - auto metadata = css->getMetadata(opCtx); - return metadata->isSharded() ? metadata->getShardVersion() - : ChunkVersion::UNSHARDED(); + auto optMetadata = css->getCurrentMetadataIfKnown(); + return (optMetadata && (*optMetadata)->isSharded()) + ? (*optMetadata)->getShardVersion() + : ChunkVersion::UNSHARDED(); }(); if (requestedVersion.isWriteCompatibleWith(collectionShardVersion)) { @@ -350,11 +351,13 @@ public: { AutoGetCollection autoColl(opCtx, nss, MODE_IS); - ChunkVersion currVersion = ChunkVersion::UNSHARDED(); - auto metadata = CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); - if (metadata->isSharded()) { - currVersion = metadata->getShardVersion(); - } + const ChunkVersion currVersion = [&] { + auto* const css = CollectionShardingState::get(opCtx, nss); + auto optMetadata = css->getCurrentMetadataIfKnown(); + return (optMetadata && (*optMetadata)->isSharded()) + ? (*optMetadata)->getShardVersion() + : ChunkVersion::UNSHARDED(); + }(); if (!status.isOK()) { // The reload itself was interrupted or confused here diff --git a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp index 1fb93a6213f..fbcc31a0557 100644 --- a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp +++ b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp @@ -74,19 +74,16 @@ void onShardVersionMismatch(OperationContext* opCtx, const auto currentShardVersion = [&] { AutoGetCollection autoColl(opCtx, nss, MODE_IS); - const auto currentMetadata = CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); - if (currentMetadata->isSharded()) { - return currentMetadata->getShardVersion(); - } - - return ChunkVersion::UNSHARDED(); + return CollectionShardingState::get(opCtx, nss)->getCurrentShardVersionIfKnown(); }(); - if (currentShardVersion.epoch() == shardVersionReceived.epoch() && - currentShardVersion.majorVersion() >= shardVersionReceived.majorVersion()) { - // Don't need to remotely reload if we're in the same epoch and the requested version is - // smaller than the one we know about. This means that the remote side is behind. - return; + if (currentShardVersion) { + if (currentShardVersion->epoch() == shardVersionReceived.epoch() && + currentShardVersion->majorVersion() >= shardVersionReceived.majorVersion()) { + // Don't need to remotely reload if we're in the same epoch and the requested version is + // smaller than the one we know about. This means that the remote side is behind. + return; + } } forceShardFilteringMetadataRefresh(opCtx, nss, forceRefreshFromThisThread); @@ -136,58 +133,69 @@ ChunkVersion forceShardFilteringMetadataRefresh(OperationContext* opCtx, invariant(!opCtx->lockState()->isLocked()); invariant(!opCtx->getClient()->isInDirectClient()); - auto const shardingState = ShardingState::get(opCtx); + auto* const shardingState = ShardingState::get(opCtx); invariant(shardingState->canAcceptShardedCommands()); - const auto routingInfo = + auto routingInfo = uassertStatusOK(Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfoWithRefresh( opCtx, nss, forceRefreshFromThisThread)); - const auto cm = routingInfo.cm(); + auto cm = routingInfo.cm(); if (!cm) { // No chunk manager, so unsharded. // Exclusive collection lock needed since we're now changing the metadata AutoGetCollection autoColl(opCtx, nss, MODE_IX, MODE_X); - - auto* const css = CollectionShardingRuntime::get(opCtx, nss); - css->setFilteringMetadata(opCtx, CollectionMetadata()); + CollectionShardingRuntime::get(opCtx, nss) + ->setFilteringMetadata(opCtx, CollectionMetadata()); return ChunkVersion::UNSHARDED(); } + // Optimistic check with only IS lock in order to avoid threads piling up on the collection X + // lock below { AutoGetCollection autoColl(opCtx, nss, MODE_IS); - auto metadata = CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); + auto optMetadata = CollectionShardingState::get(opCtx, nss)->getCurrentMetadataIfKnown(); // We already have newer version - if (metadata->isSharded() && - metadata->getCollVersion().epoch() == cm->getVersion().epoch() && - metadata->getCollVersion() >= cm->getVersion()) { - LOG(1) << "Skipping refresh of metadata for " << nss << " " - << metadata->getCollVersion() << " with an older " << cm->getVersion(); - return metadata->getShardVersion(); + if (optMetadata) { + const auto& metadata = *optMetadata; + if (metadata->isSharded() && + metadata->getCollVersion().epoch() == cm->getVersion().epoch() && + metadata->getCollVersion() >= cm->getVersion()) { + LOG(1) << "Skipping refresh of metadata for " << nss << " " + << metadata->getCollVersion() << " with an older " << cm->getVersion(); + return metadata->getShardVersion(); + } } } // Exclusive collection lock needed since we're now changing the metadata AutoGetCollection autoColl(opCtx, nss, MODE_IX, MODE_X); - auto* const css = CollectionShardingRuntime::get(opCtx, nss); - auto metadata = css->getMetadata(opCtx); + { + auto optMetadata = CollectionShardingState::get(opCtx, nss)->getCurrentMetadataIfKnown(); - // We already have newer version - if (metadata->isSharded() && metadata->getCollVersion().epoch() == cm->getVersion().epoch() && - metadata->getCollVersion() >= cm->getVersion()) { - LOG(1) << "Skipping refresh of metadata for " << nss << " " << metadata->getCollVersion() - << " with an older " << cm->getVersion(); - return metadata->getShardVersion(); + // We already have newer version + if (optMetadata) { + const auto& metadata = *optMetadata; + if (metadata->isSharded() && + metadata->getCollVersion().epoch() == cm->getVersion().epoch() && + metadata->getCollVersion() >= cm->getVersion()) { + LOG(1) << "Skipping refresh of metadata for " << nss << " " + << metadata->getCollVersion() << " with an older " << cm->getVersion(); + return metadata->getShardVersion(); + } + } } - css->setFilteringMetadata(opCtx, CollectionMetadata(cm, shardingState->shardId())); + CollectionMetadata metadata(std::move(cm), shardingState->shardId()); + const auto newShardVersion = metadata.getShardVersion(); - return css->getMetadata(opCtx)->getShardVersion(); + css->setFilteringMetadata(opCtx, std::move(metadata)); + return newShardVersion; } Status onDbVersionMismatchNoExcept( diff --git a/src/mongo/db/s/shard_server_op_observer.cpp b/src/mongo/db/s/shard_server_op_observer.cpp index 622225a9977..3078fd59396 100644 --- a/src/mongo/db/s/shard_server_op_observer.cpp +++ b/src/mongo/db/s/shard_server_op_observer.cpp @@ -206,8 +206,8 @@ void ShardServerOpObserver::onInserts(OperationContext* opCtx, std::vector<InsertStatement>::const_iterator begin, std::vector<InsertStatement>::const_iterator end, bool fromMigrate) { - auto const css = CollectionShardingState::get(opCtx, nss); - const auto metadata = css->getMetadata(opCtx); + auto* const css = CollectionShardingState::get(opCtx, nss); + const auto metadata = css->getMetadataForOperation(opCtx); for (auto it = begin; it != end; ++it) { const auto& insertedDoc = it->doc; @@ -232,8 +232,8 @@ void ShardServerOpObserver::onInserts(OperationContext* opCtx, } void ShardServerOpObserver::onUpdate(OperationContext* opCtx, const OplogUpdateEntryArgs& args) { - auto const css = CollectionShardingState::get(opCtx, args.nss); - const auto metadata = css->getMetadata(opCtx); + auto* const css = CollectionShardingState::get(opCtx, args.nss); + const auto metadata = css->getMetadataForOperation(opCtx); if (args.nss == NamespaceString::kShardConfigCollectionsNamespace) { // Notification of routing table changes are only needed on secondaries diff --git a/src/mongo/db/s/split_chunk.cpp b/src/mongo/db/s/split_chunk.cpp index f82f8f56bf2..d3c0a6908db 100644 --- a/src/mongo/db/s/split_chunk.cpp +++ b/src/mongo/db/s/split_chunk.cpp @@ -96,16 +96,17 @@ bool checkIfSingleDoc(OperationContext* opCtx, */ bool checkMetadataForSuccessfulSplitChunk(OperationContext* opCtx, const NamespaceString& nss, + const OID& epoch, const ChunkRange& chunkRange, const std::vector<BSONObj>& splitKeys) { const auto metadataAfterSplit = [&] { AutoGetCollection autoColl(opCtx, nss, MODE_IS); - return CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); + return CollectionShardingState::get(opCtx, nss)->getCurrentMetadata(); }(); uassert(ErrorCodes::StaleEpoch, - str::stream() << "Collection " << nss.ns() << " became unsharded", - metadataAfterSplit->isSharded()); + str::stream() << "Collection " << nss.ns() << " changed since split start", + metadataAfterSplit->getCollVersion().epoch() == epoch); auto newChunkBounds(splitKeys); auto startKey = chunkRange.getMin(); @@ -208,7 +209,8 @@ StatusWith<boost::optional<ChunkRange>> splitChunk(OperationContext* opCtx, if (!commandStatus.isOK() || !writeConcernStatus.isOK()) { forceShardFilteringMetadataRefresh(opCtx, nss); - if (checkMetadataForSuccessfulSplitChunk(opCtx, nss, chunkRange, splitKeys)) { + if (checkMetadataForSuccessfulSplitChunk( + opCtx, nss, expectedCollectionEpoch, chunkRange, splitKeys)) { // Split was committed. } else if (!commandStatus.isOK()) { return commandStatus; diff --git a/src/mongo/s/stale_exception.cpp b/src/mongo/s/stale_exception.cpp index 67e8c7b8ba8..42fc136e1b4 100644 --- a/src/mongo/s/stale_exception.cpp +++ b/src/mongo/s/stale_exception.cpp @@ -44,7 +44,7 @@ MONGO_INIT_REGISTER_ERROR_EXTRA_INFO(StaleDbRoutingVersion); boost::optional<ChunkVersion> extractOptionalVersion(const BSONObj& obj, StringData field) { auto swChunkVersion = ChunkVersion::parseLegacyWithField(obj, field); if (swChunkVersion == ErrorCodes::NoSuchKey) - return ChunkVersion::UNSHARDED(); + return boost::none; return uassertStatusOK(std::move(swChunkVersion)); } |