diff options
author | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2017-12-05 13:43:52 -0500 |
---|---|---|
committer | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2018-11-14 09:47:52 -0500 |
commit | dcf7e0dd89d34f58b592f1adb3d41e5edd6e2012 (patch) | |
tree | 36664490ee71eb77ddf183dcd5c642b3e732f179 /src/mongo/db | |
parent | c2cc425b9d2b23eead06ecbfd996375e47c81baa (diff) | |
download | mongo-dcf7e0dd89d34f58b592f1adb3d41e5edd6e2012.tar.gz |
SERVER-32198 Split CollectionShardingState::getMetadata into three methods
o getCurrentMetadataIfKnown - which returns the current filtering
metadata if any is available
o getMetadataForOperation - which returns the metadata which is required
by the current opertion, based on the OperationShardingState
o getCurrentMetadata - which returns the currently available filtering
metadata (or UNSHARDED if not known)
This is in preparation for making
getMetadataForOperation/getCurrentMetadata throw
StaleShardVersion exception if the metadata has not been loaded yet.
Diffstat (limited to 'src/mongo/db')
30 files changed, 517 insertions, 444 deletions
diff --git a/src/mongo/db/catalog/create_collection.cpp b/src/mongo/db/catalog/create_collection.cpp index b10261f7be8..e8552a11d6b 100644 --- a/src/mongo/db/catalog/create_collection.cpp +++ b/src/mongo/db/catalog/create_collection.cpp @@ -89,6 +89,14 @@ Status createCollection(OperationContext* opCtx, !options["capped"].trueValue() || options["size"].isNumber() || options.hasField("$nExtents")); + CollectionOptions collectionOptions; + { + Status status = collectionOptions.parse(options, kind); + if (!status.isOK()) { + return status; + } + } + return writeConflictRetry(opCtx, "create", nss.ns(), [&] { Lock::DBLock dbXLock(opCtx, nss.db(), MODE_X); const bool shardVersionCheck = true; @@ -99,12 +107,6 @@ Status createCollection(OperationContext* opCtx, str::stream() << "Not primary while creating collection " << nss.ns()); } - CollectionOptions collectionOptions; - Status status = collectionOptions.parse(options, kind); - if (!status.isOK()) { - return status; - } - if (collectionOptions.isView()) { // If the `system.views` collection does not exist, create it in a separate // WriteUnitOfWork. @@ -117,9 +119,8 @@ Status createCollection(OperationContext* opCtx, // Create collection. const bool createDefaultIndexes = true; - status = Database::userCreateNS( + Status status = Database::userCreateNS( opCtx, ctx.db(), nss.ns(), std::move(collectionOptions), createDefaultIndexes, idIndex); - if (!status.isOK()) { return status; } diff --git a/src/mongo/db/catalog/rename_collection.cpp b/src/mongo/db/catalog/rename_collection.cpp index 80e46f644de..b3fe22e81d8 100644 --- a/src/mongo/db/catalog/rename_collection.cpp +++ b/src/mongo/db/catalog/rename_collection.cpp @@ -175,10 +175,10 @@ Status renameCollectionCommon(OperationContext* opCtx, // Make sure the source collection is not sharded. { - auto const css = CollectionShardingState::get(opCtx, source); - if (css->getMetadata(opCtx)->isSharded()) { + auto* const css = CollectionShardingState::get(opCtx, source); + const auto metadata = css->getCurrentMetadata(); + if (metadata->isSharded()) return {ErrorCodes::IllegalOperation, "source namespace cannot be sharded"}; - } } // Disallow renaming from a replicated to an unreplicated collection or vice versa. @@ -220,10 +220,10 @@ Status renameCollectionCommon(OperationContext* opCtx, } { - auto const css = CollectionShardingState::get(opCtx, target); - if (css->getMetadata(opCtx)->isSharded()) { + auto* const css = CollectionShardingState::get(opCtx, target); + const auto metadata = css->getCurrentMetadata(); + if (metadata->isSharded()) return {ErrorCodes::IllegalOperation, "cannot rename to a sharded collection"}; - } } if (!options.dropTarget) { diff --git a/src/mongo/db/commands/SConscript b/src/mongo/db/commands/SConscript index c1113f28795..eca6f68020f 100644 --- a/src/mongo/db/commands/SConscript +++ b/src/mongo/db/commands/SConscript @@ -341,7 +341,7 @@ env.Library( '$BUILD_DIR/mongo/db/repl/oplog', '$BUILD_DIR/mongo/db/repl/repl_coordinator_interface', '$BUILD_DIR/mongo/db/rw_concern_d', - '$BUILD_DIR/mongo/db/s/sharding_catalog_manager', + '$BUILD_DIR/mongo/db/s/sharding_runtime_d', '$BUILD_DIR/mongo/idl/idl_parser', '$BUILD_DIR/mongo/s/sharding_legacy_api', '$BUILD_DIR/mongo/util/net/ssl_manager', diff --git a/src/mongo/db/commands/count_cmd.cpp b/src/mongo/db/commands/count_cmd.cpp index 60ee79461dc..729d5075cf3 100644 --- a/src/mongo/db/commands/count_cmd.cpp +++ b/src/mongo/db/commands/count_cmd.cpp @@ -153,7 +153,8 @@ public: // Prevent chunks from being cleaned up during yields - this allows us to only check the // version on initial entry into count. - auto rangePreserver = CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); + auto rangePreserver = + CollectionShardingState::get(opCtx, nss)->getMetadataForOperation(opCtx); auto statusWithPlanExecutor = getExecutorCount(opCtx, collection, request.getValue(), true /*explain*/); @@ -207,7 +208,8 @@ public: // Prevent chunks from being cleaned up during yields - this allows us to only check the // version on initial entry into count. - auto rangePreserver = CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); + auto rangePreserver = + CollectionShardingState::get(opCtx, nss)->getMetadataForOperation(opCtx); auto statusWithPlanExecutor = getExecutorCount(opCtx, collection, request.getValue(), false /*explain*/); diff --git a/src/mongo/db/commands/create_indexes.cpp b/src/mongo/db/commands/create_indexes.cpp index 4b7cbfef7dd..ead1b508e4e 100644 --- a/src/mongo/db/commands/create_indexes.cpp +++ b/src/mongo/db/commands/create_indexes.cpp @@ -235,8 +235,6 @@ std::vector<BSONObj> resolveDefaultsAndRemoveExistingIndexes(OperationContext* o return specs; } -} // namespace - /** * { createIndexes : "bar", indexes : [ { ns : "test.bar", key : { x : 1 }, name: "x_1" } ] } */ @@ -244,16 +242,17 @@ class CmdCreateIndex : public ErrmsgCommandDeprecated { public: CmdCreateIndex() : ErrmsgCommandDeprecated(kCommandName) {} - virtual bool supportsWriteConcern(const BSONObj& cmd) const override { + bool supportsWriteConcern(const BSONObj& cmd) const override { return true; } + AllowedOnSecondary secondaryAllowed(ServiceContext*) const override { return AllowedOnSecondary::kNever; } - virtual Status checkAuthForCommand(Client* client, - const std::string& dbname, - const BSONObj& cmdObj) const { + Status checkAuthForCommand(Client* client, + const std::string& dbname, + const BSONObj& cmdObj) const override { ActionSet actions; actions.addAction(ActionType::createIndex); Privilege p(parseResourcePattern(dbname, cmdObj), actions); @@ -262,26 +261,22 @@ public: return Status(ErrorCodes::Unauthorized, "Unauthorized"); } - virtual bool errmsgRun(OperationContext* opCtx, - const string& dbname, - const BSONObj& cmdObj, - string& errmsg, - BSONObjBuilder& result) { + bool errmsgRun(OperationContext* opCtx, + const string& dbname, + const BSONObj& cmdObj, + string& errmsg, + BSONObjBuilder& result) override { const NamespaceString ns(CommandHelpers::parseNsCollectionRequired(dbname, cmdObj)); + uassertStatusOK(userAllowedWriteNS(ns)); - Status status = userAllowedWriteNS(ns); - uassertStatusOK(status); - - // Disallow users from creating new indexes on config.transactions since the sessions - // code was optimized to not update indexes. + // Disallow users from creating new indexes on config.transactions since the sessions code + // was optimized to not update indexes uassert(ErrorCodes::IllegalOperation, str::stream() << "not allowed to create index on " << ns.ns(), ns != NamespaceString::kSessionTransactionsTableNamespace); - auto specsWithStatus = - parseAndValidateIndexSpecs(opCtx, ns, cmdObj, serverGlobalParams.featureCompatibility); - uassertStatusOK(specsWithStatus.getStatus()); - auto specs = std::move(specsWithStatus.getValue()); + auto specs = uassertStatusOK( + parseAndValidateIndexSpecs(opCtx, ns, cmdObj, serverGlobalParams.featureCompatibility)); // Do not use AutoGetOrCreateDb because we may relock the database in mode X. Lock::DBLock dbLock(opCtx, ns.db(), MODE_IX); @@ -336,8 +331,7 @@ public: uasserted(ErrorCodes::CommandNotSupportedOnView, errmsg); } - status = userAllowedCreateNS(ns.db(), ns.coll()); - uassertStatusOK(status); + uassertStatusOK(userAllowedCreateNS(ns.db(), ns.coll())); writeConflictRetry(opCtx, kCommandName, ns.ns(), [&] { WriteUnitOfWork wunit(opCtx); @@ -353,7 +347,6 @@ public: const boost::optional<int> dbProfilingLevel = boost::none; statsTracker.emplace(opCtx, ns, Top::LockType::WriteLocked, dbProfilingLevel); - MultiIndexBlockImpl indexer(opCtx, collection); indexer.allowBackgroundBuilding(); indexer.allowInterruption(); @@ -375,8 +368,7 @@ public: for (size_t i = 0; i < specs.size(); i++) { const BSONObj& spec = specs[i]; if (spec["unique"].trueValue()) { - status = checkUniqueIndexConstraints(opCtx, ns, spec["key"].Obj()); - uassertStatusOK(status); + _checkUniqueIndexConstraints(opCtx, ns, spec["key"].Obj()); } } @@ -442,24 +434,24 @@ public: } private: - static Status checkUniqueIndexConstraints(OperationContext* opCtx, - const NamespaceString& nss, - const BSONObj& newIdxKey) { + static void _checkUniqueIndexConstraints(OperationContext* opCtx, + const NamespaceString& nss, + const BSONObj& newIdxKey) { invariant(opCtx->lockState()->isCollectionLockedForMode(nss.ns(), MODE_X)); - auto metadata = CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); - if (metadata->isSharded()) { - ShardKeyPattern shardKeyPattern(metadata->getKeyPattern()); - if (!shardKeyPattern.isUniqueIndexCompatible(newIdxKey)) { - return Status(ErrorCodes::CannotCreateIndex, - str::stream() << "cannot create unique index over " << newIdxKey - << " with shard key pattern " - << shardKeyPattern.toBSON()); - } - } - + const auto metadata = CollectionShardingState::get(opCtx, nss)->getCurrentMetadata(); + if (!metadata->isSharded()) + return; - return Status::OK(); + const ShardKeyPattern shardKeyPattern(metadata->getKeyPattern()); + uassert(ErrorCodes::CannotCreateIndex, + str::stream() << "cannot create unique index over " << newIdxKey + << " with shard key pattern " + << shardKeyPattern.toBSON(), + shardKeyPattern.isUniqueIndexCompatible(newIdxKey)); } + } cmdCreateIndex; -} + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/commands/mr.cpp b/src/mongo/db/commands/mr.cpp index 3d529332bbc..a3a94166619 100644 --- a/src/mongo/db/commands/mr.cpp +++ b/src/mongo/db/commands/mr.cpp @@ -62,7 +62,7 @@ #include "mongo/db/query/plan_summary_stats.h" #include "mongo/db/query/query_planner.h" #include "mongo/db/repl/replication_coordinator.h" -#include "mongo/db/s/collection_sharding_state.h" +#include "mongo/db/s/collection_sharding_runtime.h" #include "mongo/db/s/sharding_state.h" #include "mongo/db/server_options.h" #include "mongo/db/service_context.h" @@ -516,6 +516,9 @@ void State::prepTempCollection() { incColl->getIndexCatalog()->createIndexOnEmptyCollection(_opCtx, indexSpec), str::stream() << "createIndex failed for mr incLong ns " << _config.incLong.ns()); wuow.commit(); + + CollectionShardingRuntime::get(_opCtx, _config.incLong) + ->setFilteringMetadata(_opCtx, CollectionMetadata()); }); } @@ -566,6 +569,7 @@ void State::prepTempCollection() { CollectionOptions options = finalOptions; options.temp = true; + // If a UUID for the final output collection was sent by mongos (i.e., the final output // collection is sharded), use the UUID mongos sent when creating the temp collection. // When the temp collection is renamed to the final output collection, the UUID will be @@ -595,6 +599,9 @@ void State::prepTempCollection() { _opCtx, _config.tempNamespace, *(tempColl->uuid()), indexToInsert, false); } wuow.commit(); + + CollectionShardingRuntime::get(_opCtx, _config.tempNamespace) + ->setFilteringMetadata(_opCtx, CollectionMetadata()); }); } @@ -1408,12 +1415,9 @@ public: uassert(16149, "cannot run map reduce without the js engine", getGlobalScriptEngine()); - // Prevent sharding state from changing during the MR. - const auto collMetadata = [&] { - // Get metadata before we check our version, to make sure it doesn't increment in the - // meantime + const auto metadata = [&] { AutoGetCollectionForReadCommand autoColl(opCtx, config.nss); - return CollectionShardingState::get(opCtx, config.nss)->getMetadata(opCtx); + return CollectionShardingState::get(opCtx, config.nss)->getMetadataForOperation(opCtx); }(); bool shouldHaveData = false; @@ -1481,17 +1485,13 @@ public: const ExtensionsCallbackReal extensionsCallback(opCtx, &config.nss); const boost::intrusive_ptr<ExpressionContext> expCtx; - auto statusWithCQ = + auto cq = uassertStatusOKWithContext( CanonicalQuery::canonicalize(opCtx, std::move(qr), expCtx, extensionsCallback, - MatchExpressionParser::kAllowAllSpecialFeatures); - if (!statusWithCQ.isOK()) { - uasserted(17238, "Can't canonicalize query " + config.filter.toString()); - return 0; - } - std::unique_ptr<CanonicalQuery> cq = std::move(statusWithCQ.getValue()); + MatchExpressionParser::kAllowAllSpecialFeatures), + str::stream() << "Can't canonicalize query " << config.filter); auto exec = uassertStatusOK(getExecutor(opCtx, scopedAutoColl->getCollection(), @@ -1514,38 +1514,37 @@ public: Timer mt; - // go through each doc BSONObj o; PlanExecutor::ExecState execState; while (PlanExecutor::ADVANCED == (execState = exec->getNext(&o, NULL))) { - o = o.getOwned(); // we will be accessing outside of the lock - // check to see if this is a new object we don't own yet - // because of a chunk migration - if (collMetadata->isSharded()) { - ShardKeyPattern kp(collMetadata->getKeyPattern()); - if (!collMetadata->keyBelongsToMe(kp.extractShardKeyFromDoc(o))) { + o = o.getOwned(); // The object will be accessed outside of collection lock + + // Check to see if this is a new object we don't own yet because of a chunk + // migration + if (metadata->isSharded()) { + ShardKeyPattern kp(metadata->getKeyPattern()); + if (!metadata->keyBelongsToMe(kp.extractShardKeyFromDoc(o))) { continue; } } - // do map if (config.verbose) mt.reset(); + config.mapper->map(o); + if (config.verbose) mapTime += mt.micros(); - // Check if the state accumulated so far needs to be written to a - // collection. This may yield the DB lock temporarily and then - // acquire it again. - // + // Check if the state accumulated so far needs to be written to a collection. + // This may yield the DB lock temporarily and then acquire it again. numInputs++; if (numInputs % 100 == 0) { Timer t; - // TODO: As an optimization, we might want to do the save/restore - // state and yield inside the reduceAndSpillInMemoryState method, so - // it only happens if necessary. + // TODO: As an optimization, we might want to do the save/restore state and + // yield inside the reduceAndSpillInMemoryState method, so it only happens + // if necessary. exec->saveState(); scopedAutoColl.reset(); diff --git a/src/mongo/db/exec/update.cpp b/src/mongo/db/exec/update.cpp index d1f0a3aab77..87b79fd5b8b 100644 --- a/src/mongo/db/exec/update.cpp +++ b/src/mongo/db/exec/update.cpp @@ -134,7 +134,7 @@ bool shouldRestartUpdateIfNoLongerMatches(const UpdateStageParams& params) { const std::vector<std::unique_ptr<FieldRef>>* getImmutableFields(OperationContext* opCtx, const NamespaceString& ns) { - auto metadata = CollectionShardingState::get(opCtx, ns)->getMetadata(opCtx); + auto metadata = CollectionShardingState::get(opCtx, ns)->getMetadataForOperation(opCtx); if (metadata->isSharded()) { const std::vector<std::unique_ptr<FieldRef>>& fields = metadata->getKeyPatternFields(); // Return shard-keys as immutable for the update system. @@ -284,10 +284,10 @@ BSONObj UpdateStage::transformAndUpdate(const Snapshotted<BSONObj>& oldObj, Reco RecordId newRecordId; CollectionUpdateArgs args; if (!request->isExplain()) { - auto* css = CollectionShardingState::get(getOpCtx(), collection()->ns()); args.stmtId = request->getStmtId(); args.update = logObj; - auto metadata = css->getMetadata(getOpCtx()); + auto* const css = CollectionShardingState::get(getOpCtx(), collection()->ns()); + auto metadata = css->getMetadataForOperation(getOpCtx()); args.criteria = metadata->extractDocumentKey(newObj); uassert(16980, "Multi-update operations require all documents to have an '_id' field", diff --git a/src/mongo/db/op_observer_impl.cpp b/src/mongo/db/op_observer_impl.cpp index 995d79bfb5a..810b1e78136 100644 --- a/src/mongo/db/op_observer_impl.cpp +++ b/src/mongo/db/op_observer_impl.cpp @@ -332,7 +332,7 @@ OpTimeBundle replLogApplyOps(OperationContext* opCtx, BSONObj OpObserverImpl::getDocumentKey(OperationContext* opCtx, NamespaceString const& nss, BSONObj const& doc) { - auto metadata = CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); + auto metadata = CollectionShardingState::get(opCtx, nss)->getMetadataForOperation(opCtx); return metadata->extractDocumentKey(doc).getOwned(); } diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp index 9dedae156a6..c5ea09d5033 100644 --- a/src/mongo/db/pipeline/pipeline_d.cpp +++ b/src/mongo/db/pipeline/pipeline_d.cpp @@ -125,7 +125,8 @@ StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> createRandomCursorEx if (ShardingState::get(opCtx)->needCollectionMetadata(opCtx, collection->ns().ns())) { auto shardFilterStage = stdx::make_unique<ShardFilterStage>( opCtx, - CollectionShardingState::get(opCtx, collection->ns())->getMetadata(opCtx), + CollectionShardingState::get(opCtx, collection->ns()) + ->getMetadataForOperation(opCtx), ws.get(), stage.release()); return PlanExecutor::make(opCtx, diff --git a/src/mongo/db/pipeline/process_interface_shardsvr.cpp b/src/mongo/db/pipeline/process_interface_shardsvr.cpp index b9d5fffe02b..c8c9aa66e28 100644 --- a/src/mongo/db/pipeline/process_interface_shardsvr.cpp +++ b/src/mongo/db/pipeline/process_interface_shardsvr.cpp @@ -105,22 +105,22 @@ std::pair<std::vector<FieldPath>, bool> MongoInterfaceShardServer::collectDocume return {{"_id"}, false}; } - auto scm = [opCtx, &nss]() -> ScopedCollectionMetadata { + const auto metadata = [opCtx, &nss]() { AutoGetCollection autoColl(opCtx, nss, MODE_IS); - return CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); + return CollectionShardingState::get(opCtx, nss)->getCurrentMetadata(); }(); // If the UUID is set in 'nssOrUuid', check that the UUID in the ScopedCollectionMetadata // matches. Otherwise, this implies that the collection has been dropped and recreated as // sharded. - if (!scm->isSharded() || (uuid && !scm->uuidMatches(*uuid))) { + if (!metadata->isSharded() || (uuid && !metadata->uuidMatches(*uuid))) { return {{"_id"}, false}; } // Unpack the shard key. std::vector<FieldPath> result; bool gotId = false; - for (auto& field : scm->getKeyPatternFields()) { + for (auto& field : metadata->getKeyPatternFields()) { result.emplace_back(field->dottedField()); gotId |= (result.back().fullPath() == "_id"); } diff --git a/src/mongo/db/pipeline/process_interface_standalone.cpp b/src/mongo/db/pipeline/process_interface_standalone.cpp index b3eda6e6e76..4c1dc97c2ae 100644 --- a/src/mongo/db/pipeline/process_interface_standalone.cpp +++ b/src/mongo/db/pipeline/process_interface_standalone.cpp @@ -105,8 +105,8 @@ DBClientBase* MongoInterfaceStandalone::directClient() { bool MongoInterfaceStandalone::isSharded(OperationContext* opCtx, const NamespaceString& nss) { AutoGetCollectionForRead autoColl(opCtx, nss); - auto const css = CollectionShardingState::get(opCtx, nss); - return css->getMetadata(opCtx)->isSharded(); + const auto metadata = CollectionShardingState::get(opCtx, nss)->getCurrentMetadata(); + return metadata->isSharded(); } Insert MongoInterfaceStandalone::buildInsertOp(const NamespaceString& nss, @@ -322,7 +322,7 @@ Status MongoInterfaceStandalone::attachCursorSourceToPipeline( auto css = CollectionShardingState::get(expCtx->opCtx, expCtx->ns); uassert(4567, str::stream() << "from collection (" << expCtx->ns.ns() << ") cannot be sharded", - !css->getMetadata(expCtx->opCtx)->isSharded()); + !css->getMetadataForOperation(expCtx->opCtx)->isSharded()); PipelineD::prepareCursorSource(autoColl->getCollection(), expCtx->ns, nullptr, pipeline); diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index 72d667cfc64..e8c67924f94 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -225,8 +225,8 @@ void fillOutPlannerParams(OperationContext* opCtx, // If the caller wants a shard filter, make sure we're actually sharded. if (plannerParams->options & QueryPlannerParams::INCLUDE_SHARD_FILTER) { - auto collMetadata = - CollectionShardingState::get(opCtx, canonicalQuery->nss())->getMetadata(opCtx); + auto collMetadata = CollectionShardingState::get(opCtx, canonicalQuery->nss()) + ->getMetadataForOperation(opCtx); if (collMetadata->isSharded()) { plannerParams->shardKey = collMetadata->getKeyPattern(); } else { @@ -348,7 +348,8 @@ StatusWith<PrepareExecutionResult> prepareExecution(OperationContext* opCtx, if (plannerParams.options & QueryPlannerParams::INCLUDE_SHARD_FILTER) { root = make_unique<ShardFilterStage>( opCtx, - CollectionShardingState::get(opCtx, canonicalQuery->nss())->getMetadata(opCtx), + CollectionShardingState::get(opCtx, canonicalQuery->nss()) + ->getMetadataForOperation(opCtx), ws, root.release()); } diff --git a/src/mongo/db/query/stage_builder.cpp b/src/mongo/db/query/stage_builder.cpp index 370bcf0205b..ab5de811f62 100644 --- a/src/mongo/db/query/stage_builder.cpp +++ b/src/mongo/db/query/stage_builder.cpp @@ -298,11 +298,11 @@ PlanStage* buildStages(OperationContext* opCtx, if (nullptr == childStage) { return nullptr; } - return new ShardFilterStage( - opCtx, - CollectionShardingState::get(opCtx, collection->ns())->getMetadata(opCtx), - ws, - childStage); + return new ShardFilterStage(opCtx, + CollectionShardingState::get(opCtx, collection->ns()) + ->getMetadataForOperation(opCtx), + ws, + childStage); } case STAGE_DISTINCT_SCAN: { const DistinctNode* dn = static_cast<const DistinctNode*>(root); diff --git a/src/mongo/db/s/cleanup_orphaned_cmd.cpp b/src/mongo/db/s/cleanup_orphaned_cmd.cpp index 7269a5efa9b..52cfcf5166a 100644 --- a/src/mongo/db/s/cleanup_orphaned_cmd.cpp +++ b/src/mongo/db/s/cleanup_orphaned_cmd.cpp @@ -80,11 +80,16 @@ CleanupResult cleanupOrphanedData(OperationContext* opCtx, { AutoGetCollection autoColl(opCtx, ns, MODE_IX); auto* const css = CollectionShardingRuntime::get(opCtx, ns); + const auto optMetadata = css->getCurrentMetadataIfKnown(); + uassert(ErrorCodes::ConflictingOperationInProgress, + str::stream() << "Unable to establish sharding status for collection " << ns.ns(), + optMetadata); + + const auto& metadata = *optMetadata; - auto metadata = css->getMetadata(opCtx); if (!metadata->isSharded()) { - log() << "skipping orphaned data cleanup for " << ns.toString() - << ", collection is not sharded"; + LOG(0) << "skipping orphaned data cleanup for " << ns.ns() + << ", collection is not sharded"; return CleanupResult_Done; } diff --git a/src/mongo/db/s/collection_metadata_filtering_test.cpp b/src/mongo/db/s/collection_metadata_filtering_test.cpp index 7f777dfa179..46db156c36a 100644 --- a/src/mongo/db/s/collection_metadata_filtering_test.cpp +++ b/src/mongo/db/s/collection_metadata_filtering_test.cpp @@ -32,6 +32,7 @@ #include "mongo/db/catalog_raii.h" #include "mongo/db/s/collection_sharding_runtime.h" +#include "mongo/db/s/operation_sharding_state.h" #include "mongo/s/catalog/type_chunk.h" #include "mongo/s/shard_server_test_fixture.h" @@ -107,6 +108,12 @@ protected: } _manager->setFilteringMetadata(CollectionMetadata(cm, ShardId("0"))); + + auto& oss = OperationShardingState::get(operationContext()); + const auto version = cm->getVersion(ShardId("0")); + BSONObjBuilder builder; + version.appendToCommand(&builder); + oss.initializeClientRoutingVersions(kNss, builder.obj()); } std::shared_ptr<MetadataManager> _manager; @@ -134,7 +141,7 @@ TEST_F(CollectionMetadataFilteringTest, FilterDocumentsInTheFuture) { AutoGetCollection autoColl(operationContext(), kNss, MODE_IS); auto* const css = CollectionShardingState::get(operationContext(), kNss); - testFn(css->getMetadata(operationContext())); + testFn(css->getMetadataForOperation(operationContext())); } { @@ -165,7 +172,7 @@ TEST_F(CollectionMetadataFilteringTest, FilterDocumentsInThePast) { AutoGetCollection autoColl(operationContext(), kNss, MODE_IS); auto* const css = CollectionShardingState::get(operationContext(), kNss); - testFn(css->getMetadata(operationContext())); + testFn(css->getMetadataForOperation(operationContext())); } { @@ -204,7 +211,7 @@ TEST_F(CollectionMetadataFilteringTest, FilterDocumentsTooFarInThePastThrowsStal AutoGetCollection autoColl(operationContext(), kNss, MODE_IS); auto* const css = CollectionShardingState::get(operationContext(), kNss); - testFn(css->getMetadata(operationContext())); + testFn(css->getMetadataForOperation(operationContext())); } { diff --git a/src/mongo/db/s/collection_sharding_runtime.cpp b/src/mongo/db/s/collection_sharding_runtime.cpp index 4cd4be9b258..f8dc306e679 100644 --- a/src/mongo/db/s/collection_sharding_runtime.cpp +++ b/src/mongo/db/s/collection_sharding_runtime.cpp @@ -45,6 +45,30 @@ namespace { // How long to wait before starting cleanup of an emigrated chunk range MONGO_EXPORT_SERVER_PARAMETER(orphanCleanupDelaySecs, int, 900); // 900s = 15m +/** + * Returns whether the specified namespace is used for sharding-internal purposes only and can never + * be marked as anything other than UNSHARDED, because the call sites which reference these + * collections are not prepared to handle StaleConfig errors. + */ +bool isNamespaceAlwaysUnsharded(const NamespaceString& nss) { + // There should never be a case to mark as sharded collections which are on the config server + if (serverGlobalParams.clusterRole != ClusterRole::ShardServer) + return true; + + // Local and admin never have sharded collections + if (nss.db() == NamespaceString::kLocalDb || nss.db() == NamespaceString::kAdminDb) + return true; + + // Certain config collections can never be sharded + if (nss == NamespaceString::kSessionTransactionsTableNamespace) + return true; + + if (nss.isSystemDotProfile()) + return true; + + return false; +} + } // namespace CollectionShardingRuntime::CollectionShardingRuntime(ServiceContext* sc, @@ -52,7 +76,11 @@ CollectionShardingRuntime::CollectionShardingRuntime(ServiceContext* sc, executor::TaskExecutor* rangeDeleterExecutor) : CollectionShardingState(nss), _nss(std::move(nss)), - _metadataManager(std::make_shared<MetadataManager>(sc, _nss, rangeDeleterExecutor)) {} + _metadataManager(std::make_shared<MetadataManager>(sc, _nss, rangeDeleterExecutor)) { + if (isNamespaceAlwaysUnsharded(_nss)) { + _metadataManager->setFilteringMetadata(CollectionMetadata()); + } +} CollectionShardingRuntime* CollectionShardingRuntime::get(OperationContext* opCtx, const NamespaceString& nss) { @@ -62,13 +90,17 @@ CollectionShardingRuntime* CollectionShardingRuntime::get(OperationContext* opCt void CollectionShardingRuntime::setFilteringMetadata(OperationContext* opCtx, CollectionMetadata newMetadata) { + invariant(!newMetadata.isSharded() || !isNamespaceAlwaysUnsharded(_nss), + str::stream() << "Namespace " << _nss.ns() << " must never be sharded."); invariant(opCtx->lockState()->isCollectionLockedForMode(_nss.ns(), MODE_X)); _metadataManager->setFilteringMetadata(std::move(newMetadata)); } void CollectionShardingRuntime::clearFilteringMetadata() { - _metadataManager->clearFilteringMetadata(); + if (!isNamespaceAlwaysUnsharded(_nss)) { + _metadataManager->clearFilteringMetadata(); + } } auto CollectionShardingRuntime::beginReceive(ChunkRange const& range) -> CleanupNotification { diff --git a/src/mongo/db/s/collection_sharding_state.cpp b/src/mongo/db/s/collection_sharding_state.cpp index 45645383969..f65f262a029 100644 --- a/src/mongo/db/s/collection_sharding_state.cpp +++ b/src/mongo/db/s/collection_sharding_state.cpp @@ -34,6 +34,7 @@ #include "mongo/db/s/collection_sharding_state.h" +#include "mongo/db/repl/read_concern_args.h" #include "mongo/db/s/operation_sharding_state.h" #include "mongo/db/s/sharded_connection_info.h" #include "mongo/s/stale_exception.h" @@ -72,8 +73,9 @@ public: stdx::lock_guard<stdx::mutex> lg(_mutex); for (auto& coll : _collections) { - ScopedCollectionMetadata metadata = coll.second->getMetadata(opCtx); - if (metadata->isSharded()) { + const auto optMetadata = coll.second->getCurrentMetadataIfKnown(); + if (optMetadata) { + const auto& metadata = *optMetadata; versionB.appendTimestamp(coll.first, metadata->getShardVersion().toLong()); } } @@ -109,6 +111,28 @@ private: const auto kUnshardedCollection = std::make_shared<UnshardedCollection>(); +ChunkVersion getOperationReceivedVersion(OperationContext* opCtx, const NamespaceString& nss) { + auto& oss = OperationShardingState::get(opCtx); + + // If there is a version attached to the OperationContext, use it as the received version, + // otherwise get the received version from the ShardedConnectionInfo + if (oss.hasShardVersion()) { + return oss.getShardVersion(nss); + } else if (auto const info = ShardedConnectionInfo::get(opCtx->getClient(), false)) { + auto connectionShardVersion = info->getVersion(nss.ns()); + + // For backwards compatibility with map/reduce, which can access up to 2 sharded collections + // in a single call, the lack of version for a namespace on the collection must be treated + // as UNSHARDED + return connectionShardVersion.value_or(ChunkVersion::UNSHARDED()); + } else { + // There is no shard version information on either 'opCtx' or 'client'. This means that the + // operation represented by 'opCtx' is unversioned, and the shard version is always OK for + // unversioned operations. + return ChunkVersion::IGNORED(); + } +} + } // namespace CollectionShardingState::CollectionShardingState(NamespaceString nss) : _nss(std::move(nss)) {} @@ -127,37 +151,49 @@ void CollectionShardingState::report(OperationContext* opCtx, BSONObjBuilder* bu collectionsMap->report(opCtx, builder); } -ScopedCollectionMetadata CollectionShardingState::getMetadata(OperationContext* opCtx) { +ScopedCollectionMetadata CollectionShardingState::getMetadataForOperation(OperationContext* opCtx) { + const auto receivedShardVersion = getOperationReceivedVersion(opCtx, _nss); + + if (ChunkVersion::isIgnoredVersion(receivedShardVersion)) { + return {kUnshardedCollection}; + } + const auto atClusterTime = repl::ReadConcernArgs::get(opCtx).getArgsAtClusterTime(); auto optMetadata = _getMetadata(atClusterTime); + if (!optMetadata) return {kUnshardedCollection}; - return std::move(*optMetadata); + return {std::move(*optMetadata)}; } -void CollectionShardingState::checkShardVersionOrThrow(OperationContext* opCtx) { - auto& oss = OperationShardingState::get(opCtx); +ScopedCollectionMetadata CollectionShardingState::getCurrentMetadata() { + auto optMetadata = _getMetadata(boost::none); - const auto receivedShardVersion = [&] { - // If there is a version attached to the OperationContext, use it as the received version, - // otherwise get the received version from the ShardedConnectionInfo - if (oss.hasShardVersion()) { - return oss.getShardVersion(_nss); - } else if (auto const info = ShardedConnectionInfo::get(opCtx->getClient(), false)) { - auto connectionShardVersion = info->getVersion(_nss.ns()); - - // For backwards compatibility with map/reduce, which can access up to 2 sharded - // collections in a single call, the lack of version for a namespace on the collection - // must be treated as UNSHARDED - return connectionShardVersion.value_or(ChunkVersion::UNSHARDED()); - } else { - // There is no shard version information on either 'opCtx' or 'client'. This means that - // the operation represented by 'opCtx' is unversioned, and the shard version is always - // OK for unversioned operations. - return ChunkVersion::IGNORED(); - } - }(); + if (!optMetadata) + return {kUnshardedCollection}; + + return {std::move(*optMetadata)}; +} + +boost::optional<ScopedCollectionMetadata> CollectionShardingState::getCurrentMetadataIfKnown() { + return _getMetadata(boost::none); +} + +boost::optional<ChunkVersion> CollectionShardingState::getCurrentShardVersionIfKnown() { + const auto optMetadata = _getMetadata(boost::none); + if (!optMetadata) + return boost::none; + + const auto& metadata = *optMetadata; + if (!metadata->isSharded()) + return ChunkVersion::UNSHARDED(); + + return metadata->getCollVersion(); +} + +void CollectionShardingState::checkShardVersionOrThrow(OperationContext* opCtx) { + const auto receivedShardVersion = getOperationReceivedVersion(opCtx, _nss); if (ChunkVersion::isIgnoredVersion(receivedShardVersion)) { return; @@ -167,8 +203,7 @@ void CollectionShardingState::checkShardVersionOrThrow(OperationContext* opCtx) invariant(repl::ReadConcernArgs::get(opCtx).getLevel() != repl::ReadConcernLevel::kAvailableReadConcern); - // Set this for error messaging purposes before potentially returning false. - auto metadata = getMetadata(opCtx); + const auto metadata = getMetadataForOperation(opCtx); const auto wantedShardVersion = metadata->isSharded() ? metadata->getShardVersion() : ChunkVersion::UNSHARDED(); @@ -178,6 +213,7 @@ void CollectionShardingState::checkShardVersionOrThrow(OperationContext* opCtx) if (criticalSectionSignal) { // Set migration critical section on operation sharding state: operation will wait for the // migration to finish before returning failure and retrying. + auto& oss = OperationShardingState::get(opCtx); oss.setMigrationCriticalSectionSignal(criticalSectionSignal); uasserted(StaleConfigInfo(_nss, receivedShardVersion, wantedShardVersion), diff --git a/src/mongo/db/s/collection_sharding_state.h b/src/mongo/db/s/collection_sharding_state.h index 77e11534f73..e964dbced12 100644 --- a/src/mongo/db/s/collection_sharding_state.h +++ b/src/mongo/db/s/collection_sharding_state.h @@ -73,14 +73,28 @@ public: static void report(OperationContext* opCtx, BSONObjBuilder* builder); /** - * Returns the chunk filtering metadata for the collection. The returned object is safe to - * access outside of collection lock. + * Returns the chunk filtering metadata that the current operation should be using for that + * collection or otherwise throws if it has not been loaded yet. If the operation does not + * require a specific shard version, returns an UNSHARDED metadata. The returned object is safe + * to access outside of collection lock. * * If the operation context contains an 'atClusterTime' property, the returned filtering * metadata will be tied to a specific point in time. Otherwise it will reference the latest * time available. */ - ScopedCollectionMetadata getMetadata(OperationContext* opCtx); + ScopedCollectionMetadata getMetadataForOperation(OperationContext* opCtx); + ScopedCollectionMetadata getCurrentMetadata(); + + /** + * Returns boost::none if the filtering metadata for the collection is not known yet. Otherwise + * returns the most recently refreshed from the config server metadata or shard version. + * + * These methods do not check for the shard version that the operation requires and should only + * be used for cases such as checking whether a particular config server update has taken + * effect. + */ + boost::optional<ScopedCollectionMetadata> getCurrentMetadataIfKnown(); + boost::optional<ChunkVersion> getCurrentShardVersionIfKnown(); /** * Checks whether the shard version in the operation context is compatible with the shard diff --git a/src/mongo/db/s/collection_sharding_state_test.cpp b/src/mongo/db/s/collection_sharding_state_test.cpp index 5855b9ae947..2eb48e12105 100644 --- a/src/mongo/db/s/collection_sharding_state_test.cpp +++ b/src/mongo/db/s/collection_sharding_state_test.cpp @@ -33,6 +33,7 @@ #include "mongo/db/catalog_raii.h" #include "mongo/db/s/collection_sharding_runtime.h" #include "mongo/db/s/op_observer_sharding_impl.h" +#include "mongo/db/s/operation_sharding_state.h" #include "mongo/db/s/type_shard_identity.h" #include "mongo/s/shard_server_test_fixture.h" @@ -58,12 +59,25 @@ CollectionMetadata makeAMetadata(BSONObj const& keyPattern) { return CollectionMetadata(std::move(cm), ShardId("this")); } -using DeleteStateTest = ShardServerTestFixture; +class DeleteStateTest : public ShardServerTestFixture { +protected: + void setCollectionFilteringMetadata(CollectionMetadata metadata) { + AutoGetCollection autoColl(operationContext(), kTestNss, MODE_X); + auto* const css = CollectionShardingRuntime::get(operationContext(), kTestNss); + css->setFilteringMetadata(operationContext(), std::move(metadata)); + + auto& oss = OperationShardingState::get(operationContext()); + const auto version = metadata.getShardVersion(); + BSONObjBuilder builder; + version.appendToCommand(&builder); + oss.initializeClientRoutingVersions(kTestNss, builder.obj()); + } +}; TEST_F(DeleteStateTest, MakeDeleteStateUnsharded) { + setCollectionFilteringMetadata(CollectionMetadata()); + AutoGetCollection autoColl(operationContext(), kTestNss, MODE_IX); - auto* const css = CollectionShardingRuntime::get(operationContext(), kTestNss); - css->setFilteringMetadata(operationContext(), CollectionMetadata()); auto doc = BSON("key3" << "abc" @@ -82,11 +96,10 @@ TEST_F(DeleteStateTest, MakeDeleteStateUnsharded) { } TEST_F(DeleteStateTest, MakeDeleteStateShardedWithoutIdInShardKey) { - AutoGetCollection autoColl(operationContext(), kTestNss, MODE_IX); - auto* const css = CollectionShardingRuntime::get(operationContext(), kTestNss); - // Push a CollectionMetadata with a shard key not including "_id"... - css->setFilteringMetadata(operationContext(), makeAMetadata(BSON("key" << 1 << "key3" << 1))); + setCollectionFilteringMetadata(makeAMetadata(BSON("key" << 1 << "key3" << 1))); + + AutoGetCollection autoColl(operationContext(), kTestNss, MODE_IX); // The order of fields in `doc` deliberately does not match the shard key auto doc = BSON("key3" @@ -108,12 +121,10 @@ TEST_F(DeleteStateTest, MakeDeleteStateShardedWithoutIdInShardKey) { } TEST_F(DeleteStateTest, MakeDeleteStateShardedWithIdInShardKey) { - AutoGetCollection autoColl(operationContext(), kTestNss, MODE_IX); - auto* const css = CollectionShardingRuntime::get(operationContext(), kTestNss); - // Push a CollectionMetadata with a shard key that does have "_id" in the middle... - css->setFilteringMetadata(operationContext(), - makeAMetadata(BSON("key" << 1 << "_id" << 1 << "key2" << 1))); + setCollectionFilteringMetadata(makeAMetadata(BSON("key" << 1 << "_id" << 1 << "key2" << 1))); + + AutoGetCollection autoColl(operationContext(), kTestNss, MODE_IX); // The order of fields in `doc` deliberately does not match the shard key auto doc = BSON("key2" << true << "key3" @@ -133,13 +144,11 @@ TEST_F(DeleteStateTest, MakeDeleteStateShardedWithIdInShardKey) { } TEST_F(DeleteStateTest, MakeDeleteStateShardedWithIdHashInShardKey) { - AutoGetCollection autoColl(operationContext(), kTestNss, MODE_IX); - auto* const css = CollectionShardingRuntime::get(operationContext(), kTestNss); - // Push a CollectionMetadata with a shard key "_id", hashed. - auto aMetadata = makeAMetadata(BSON("_id" - << "hashed")); - css->setFilteringMetadata(operationContext(), std::move(aMetadata)); + setCollectionFilteringMetadata(makeAMetadata(BSON("_id" + << "hashed"))); + + AutoGetCollection autoColl(operationContext(), kTestNss, MODE_IX); auto doc = BSON("key2" << true << "_id" << "hello" diff --git a/src/mongo/db/s/get_shard_version_command.cpp b/src/mongo/db/s/get_shard_version_command.cpp index 2ebf5fb773e..b1f606fa999 100644 --- a/src/mongo/db/s/get_shard_version_command.cpp +++ b/src/mongo/db/s/get_shard_version_command.cpp @@ -110,27 +110,37 @@ public: AutoGetCollection autoColl(opCtx, nss, MODE_IS); auto* const css = CollectionShardingRuntime::get(opCtx, nss); - const auto metadata = css->getMetadata(opCtx); - if (metadata->isSharded()) { - result.appendTimestamp("global", metadata->getShardVersion().toLong()); + const auto optMetadata = css->getCurrentMetadataIfKnown(); + if (!optMetadata) { + result.append("global", "UNKNOWN"); + + if (cmdObj["fullMetadata"].trueValue()) { + result.append("metadata", BSONObj()); + } } else { - result.appendTimestamp("global", ChunkVersion::UNSHARDED().toLong()); - } + const auto& metadata = *optMetadata; - if (cmdObj["fullMetadata"].trueValue()) { - BSONObjBuilder metadataBuilder(result.subobjStart("metadata")); if (metadata->isSharded()) { - metadata->toBSONBasic(metadataBuilder); + result.appendTimestamp("global", metadata->getShardVersion().toLong()); + } else { + result.appendTimestamp("global", ChunkVersion::UNSHARDED().toLong()); + } + + if (cmdObj["fullMetadata"].trueValue()) { + BSONObjBuilder metadataBuilder(result.subobjStart("metadata")); + if (metadata->isSharded()) { + metadata->toBSONBasic(metadataBuilder); - BSONArrayBuilder chunksArr(metadataBuilder.subarrayStart("chunks")); - metadata->toBSONChunks(chunksArr); - chunksArr.doneFast(); + BSONArrayBuilder chunksArr(metadataBuilder.subarrayStart("chunks")); + metadata->toBSONChunks(chunksArr); + chunksArr.doneFast(); - BSONArrayBuilder pendingArr(metadataBuilder.subarrayStart("pending")); - css->toBSONPending(pendingArr); - pendingArr.doneFast(); + BSONArrayBuilder pendingArr(metadataBuilder.subarrayStart("pending")); + css->toBSONPending(pendingArr); + pendingArr.doneFast(); + } + metadataBuilder.doneFast(); } - metadataBuilder.doneFast(); } return true; diff --git a/src/mongo/db/s/merge_chunks_command.cpp b/src/mongo/db/s/merge_chunks_command.cpp index a4d42284520..37cee07f990 100644 --- a/src/mongo/db/s/merge_chunks_command.cpp +++ b/src/mongo/db/s/merge_chunks_command.cpp @@ -51,55 +51,46 @@ #include "mongo/util/mongoutils/str.h" namespace mongo { - -using std::string; -using std::vector; -using str::stream; - namespace { bool checkMetadataForSuccess(OperationContext* opCtx, const NamespaceString& nss, - const BSONObj& minKey, - const BSONObj& maxKey) { + const OID& epoch, + const ChunkRange& chunkRange) { const auto metadataAfterMerge = [&] { AutoGetCollection autoColl(opCtx, nss, MODE_IS); - return CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); + return CollectionShardingState::get(opCtx, nss)->getCurrentMetadata(); }(); uassert(ErrorCodes::StaleEpoch, - str::stream() << "Collection " << nss.ns() << " became unsharded", - metadataAfterMerge->isSharded()); + str::stream() << "Collection " << nss.ns() << " changed since merge start", + metadataAfterMerge->getCollVersion().epoch() == epoch); ChunkType chunk; - if (!metadataAfterMerge->getNextChunk(minKey, &chunk)) { + if (!metadataAfterMerge->getNextChunk(chunkRange.getMin(), &chunk)) { return false; } - return chunk.getMin().woCompare(minKey) == 0 && chunk.getMax().woCompare(maxKey) == 0; + return chunk.getMin().woCompare(chunkRange.getMin()) == 0 && + chunk.getMax().woCompare(chunkRange.getMax()) == 0; } -Status mergeChunks(OperationContext* opCtx, - const NamespaceString& nss, - const BSONObj& minKey, - const BSONObj& maxKey, - const OID& epoch) { - // Get the distributed lock - // TODO(SERVER-25086): Remove distLock acquisition from merge chunk - const string whyMessage = stream() << "merging chunks in " << nss.ns() << " from " << minKey - << " to " << maxKey; - - auto scopedDistLock = Grid::get(opCtx)->catalogClient()->getDistLockManager()->lock( - opCtx, nss.ns(), whyMessage, DistLockManager::kSingleLockAttemptTimeout); - - if (!scopedDistLock.isOK()) { - std::string context = stream() << "could not acquire collection lock for " << nss.ns() - << " to merge chunks in [" << redact(minKey) << ", " - << redact(maxKey) << ")"; - - warning() << context << causedBy(scopedDistLock.getStatus()); - return scopedDistLock.getStatus().withContext(context); - } +void mergeChunks(OperationContext* opCtx, + const NamespaceString& nss, + const BSONObj& minKey, + const BSONObj& maxKey, + const OID& epoch) { + const std::string whyMessage = str::stream() << "merging chunks in " << nss.ns() << " from " + << minKey << " to " << maxKey; + auto scopedDistLock = uassertStatusOKWithContext( + Grid::get(opCtx)->catalogClient()->getDistLockManager()->lock( + opCtx, nss.ns(), whyMessage, DistLockManager::kSingleLockAttemptTimeout), + str::stream() << "could not acquire collection lock for " << nss.ns() + << " to merge chunks in [" + << redact(minKey) + << ", " + << redact(maxKey) + << ")"); auto const shardingState = ShardingState::get(opCtx); @@ -109,39 +100,32 @@ Status mergeChunks(OperationContext* opCtx, const auto metadata = [&] { AutoGetCollection autoColl(opCtx, nss, MODE_IS); - return CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); + return CollectionShardingState::get(opCtx, nss)->getCurrentMetadata(); }(); - if (!metadata->isSharded()) { - std::string errmsg = stream() << "could not merge chunks, collection " << nss.ns() - << " is not sharded"; - - warning() << errmsg; - return {ErrorCodes::StaleEpoch, errmsg}; - } + uassert(ErrorCodes::StaleEpoch, + str::stream() << "Collection " << nss.ns() << " became unsharded", + metadata->isSharded()); const auto shardVersion = metadata->getShardVersion(); - - if (epoch.isSet() && shardVersion.epoch() != epoch) { - std::string errmsg = stream() - << "could not merge chunks, collection " << nss.ns() - << " has changed since merge was sent (sent epoch: " << epoch.toString() - << ", current epoch: " << shardVersion.epoch() << ")"; - - warning() << errmsg; - return {ErrorCodes::StaleEpoch, errmsg}; - } - - if (!metadata->isValidKey(minKey) || !metadata->isValidKey(maxKey)) { - std::string errmsg = stream() << "could not merge chunks, the range " - << redact(ChunkRange(minKey, maxKey).toString()) - << " is not valid" - << " for collection " << nss.ns() << " with key pattern " - << metadata->getKeyPattern().toString(); - - warning() << errmsg; - return Status(ErrorCodes::IllegalOperation, errmsg); - } + uassert(ErrorCodes::StaleEpoch, + str::stream() << "could not merge chunks, collection " << nss.ns() + << " has changed since merge was sent (sent epoch: " + << epoch.toString() + << ", current epoch: " + << shardVersion.epoch() + << ")", + shardVersion.epoch() == epoch); + + uassert(ErrorCodes::IllegalOperation, + str::stream() << "could not merge chunks, the range " + << redact(ChunkRange(minKey, maxKey).toString()) + << " is not valid" + << " for collection " + << nss.ns() + << " with key pattern " + << metadata->getKeyPattern().toString(), + metadata->isValidKey(minKey) && metadata->isValidKey(maxKey)); // // Get merged chunk information @@ -160,15 +144,15 @@ Status mergeChunks(OperationContext* opCtx, chunksToMerge.push_back(itChunk); } - if (chunksToMerge.empty()) { - std::string errmsg = stream() - << "could not merge chunks, collection " << nss.ns() << " range starting at " - << redact(minKey) << " and ending at " << redact(maxKey) << " does not belong to shard " - << shardingState->shardId(); - - warning() << errmsg; - return Status(ErrorCodes::IllegalOperation, errmsg); - } + uassert(ErrorCodes::IllegalOperation, + str::stream() << "could not merge chunks, collection " << nss.ns() + << " range starting at " + << redact(minKey) + << " and ending at " + << redact(maxKey) + << " does not belong to shard " + << shardingState->shardId(), + !chunksToMerge.empty()); // // Validate the range starts and ends at chunks and has no holes, error if not valid @@ -179,65 +163,56 @@ Status mergeChunks(OperationContext* opCtx, // minKey is inclusive bool minKeyInRange = rangeContains(firstDocMin, firstDocMax, minKey); - if (!minKeyInRange) { - std::string errmsg = stream() << "could not merge chunks, collection " << nss.ns() - << " range starting at " << redact(minKey) - << " does not belong to shard " << shardingState->shardId(); - - warning() << errmsg; - return Status(ErrorCodes::IllegalOperation, errmsg); - } + uassert(ErrorCodes::IllegalOperation, + str::stream() << "could not merge chunks, collection " << nss.ns() + << " range starting at " + << redact(minKey) + << " does not belong to shard " + << shardingState->shardId(), + minKeyInRange); BSONObj lastDocMin = chunksToMerge.back().getMin(); BSONObj lastDocMax = chunksToMerge.back().getMax(); // maxKey is exclusive bool maxKeyInRange = lastDocMin.woCompare(maxKey) < 0 && lastDocMax.woCompare(maxKey) >= 0; - if (!maxKeyInRange) { - std::string errmsg = stream() << "could not merge chunks, collection " << nss.ns() - << " range ending at " << redact(maxKey) - << " does not belong to shard " << shardingState->shardId(); - - warning() << errmsg; - return Status(ErrorCodes::IllegalOperation, errmsg); - } + uassert(ErrorCodes::IllegalOperation, + str::stream() << "could not merge chunks, collection " << nss.ns() + << " range ending at " + << redact(maxKey) + << " does not belong to shard " + << shardingState->shardId(), + maxKeyInRange); bool validRangeStartKey = firstDocMin.woCompare(minKey) == 0; bool validRangeEndKey = lastDocMax.woCompare(maxKey) == 0; - if (!validRangeStartKey || !validRangeEndKey) { - std::string errmsg = stream() - << "could not merge chunks, collection " << nss.ns() << " does not contain a chunk " - << (!validRangeStartKey ? "starting at " + redact(minKey.toString()) : "") - << (!validRangeStartKey && !validRangeEndKey ? " or " : "") - << (!validRangeEndKey ? "ending at " + redact(maxKey.toString()) : ""); - - warning() << errmsg; - return Status(ErrorCodes::IllegalOperation, errmsg); - } - - if (chunksToMerge.size() == 1) { - std::string errmsg = stream() << "could not merge chunks, collection " << nss.ns() - << " already contains chunk for " - << redact(ChunkRange(minKey, maxKey).toString()); - - warning() << errmsg; - return Status(ErrorCodes::IllegalOperation, errmsg); - } + uassert(ErrorCodes::IllegalOperation, + str::stream() << "could not merge chunks, collection " << nss.ns() + << " does not contain a chunk " + << (!validRangeStartKey ? "starting at " + redact(minKey.toString()) : "") + << (!validRangeStartKey && !validRangeEndKey ? " or " : "") + << (!validRangeEndKey ? "ending at " + redact(maxKey.toString()) : ""), + validRangeStartKey && validRangeEndKey); + uassert(ErrorCodes::IllegalOperation, + str::stream() << "could not merge chunks, collection " << nss.ns() + << " already contains chunk for " + << ChunkRange(minKey, maxKey).toString(), + chunksToMerge.size() > 1); // Look for hole in range for (size_t i = 1; i < chunksToMerge.size(); ++i) { - if (chunksToMerge[i - 1].getMax().woCompare(chunksToMerge[i].getMin()) != 0) { - std::string errmsg = stream() - << "could not merge chunks, collection " << nss.ns() << " has a hole in the range " - << redact(ChunkRange(minKey, maxKey).toString()) << " at " - << redact(ChunkRange(chunksToMerge[i - 1].getMax(), chunksToMerge[i].getMin()) - .toString()); - - warning() << errmsg; - return Status(ErrorCodes::IllegalOperation, errmsg); - } + uassert( + ErrorCodes::IllegalOperation, + str::stream() + << "could not merge chunks, collection " + << nss.ns() + << " has a hole in the range " + << ChunkRange(minKey, maxKey).toString() + << " at " + << ChunkRange(chunksToMerge[i - 1].getMax(), chunksToMerge[i].getMin()).toString(), + chunksToMerge[i - 1].getMax().woCompare(chunksToMerge[i].getMin()) == 0); } // @@ -251,42 +226,33 @@ Status mergeChunks(OperationContext* opCtx, auto configCmdObj = request.toConfigCommandBSON(ShardingCatalogClient::kMajorityWriteConcern.toBSON()); - auto cmdResponseStatus = Grid::get(opCtx)->shardRegistry()->getConfigShard()->runCommand( - opCtx, - ReadPreferenceSetting{ReadPreference::PrimaryOnly}, - "admin", - configCmdObj, - Shard::RetryPolicy::kIdempotent); + auto cmdResponse = + uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getConfigShard()->runCommand( + opCtx, + ReadPreferenceSetting{ReadPreference::PrimaryOnly}, + "admin", + configCmdObj, + Shard::RetryPolicy::kIdempotent)); // Refresh metadata to pick up new chunk definitions (regardless of the results returned from // running _configsvrCommitChunkMerge). forceShardFilteringMetadataRefresh(opCtx, nss); - // If we failed to get any response from the config server at all, despite retries, then we - // should just go ahead and fail the whole operation. - if (!cmdResponseStatus.isOK()) { - return cmdResponseStatus.getStatus(); - } - // If _configsvrCommitChunkMerge returned an error, look at this shard's metadata to determine // if the merge actually did happen. This can happen if there's a network error getting the // response from the first call to _configsvrCommitChunkMerge, but it actually succeeds, thus // the automatic retry fails with a precondition violation, for example. - auto commandStatus = std::move(cmdResponseStatus.getValue().commandStatus); - auto writeConcernStatus = std::move(cmdResponseStatus.getValue().writeConcernStatus); + auto commandStatus = std::move(cmdResponse.commandStatus); + auto writeConcernStatus = std::move(cmdResponse.writeConcernStatus); if ((!commandStatus.isOK() || !writeConcernStatus.isOK()) && - checkMetadataForSuccess(opCtx, nss, minKey, maxKey)) { - + checkMetadataForSuccess(opCtx, nss, epoch, ChunkRange(minKey, maxKey))) { LOG(1) << "mergeChunk [" << redact(minKey) << "," << redact(maxKey) << ") has already been committed."; - } else if (!commandStatus.isOK()) { - return commandStatus.withContext("Failed to commit chunk merge"); - } else if (!writeConcernStatus.isOK()) { - return writeConcernStatus.withContext("Failed to commit chunk merge"); } - return Status::OK(); + uassertStatusOKWithContext(commandStatus, "Failed to commit chunk merge"); + uassertStatusOKWithContext(writeConcernStatus, "Failed to commit chunk merge"); } class MergeChunksCommand : public ErrmsgCommandDeprecated { @@ -325,22 +291,22 @@ public: } // Required - static BSONField<string> nsField; - static BSONField<vector<BSONObj>> boundsField; + static BSONField<std::string> nsField; + static BSONField<std::vector<BSONObj>> boundsField; // Optional, if the merge is only valid for a particular epoch static BSONField<OID> epochField; bool errmsgRun(OperationContext* opCtx, - const string& dbname, + const std::string& dbname, const BSONObj& cmdObj, - string& errmsg, + std::string& errmsg, BSONObjBuilder& result) override { uassertStatusOK(ShardingState::get(opCtx)->canAcceptShardedCommands()); const NamespaceString nss(parseNs(dbname, cmdObj)); - vector<BSONObj> bounds; + std::vector<BSONObj> bounds; if (!FieldParser::extract(cmdObj, boundsField, &bounds, &errmsg)) { return false; } @@ -374,15 +340,14 @@ public: return false; } - auto mergeStatus = mergeChunks(opCtx, nss, minKey, maxKey, epoch); - uassertStatusOK(mergeStatus); + mergeChunks(opCtx, nss, minKey, maxKey, epoch); return true; } } mergeChunksCmd; -BSONField<string> MergeChunksCommand::nsField("mergeChunks"); -BSONField<vector<BSONObj>> MergeChunksCommand::boundsField("bounds"); +BSONField<std::string> MergeChunksCommand::nsField("mergeChunks"); +BSONField<std::vector<BSONObj>> MergeChunksCommand::boundsField("bounds"); BSONField<OID> MergeChunksCommand::epochField("epoch"); } // namespace diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp index 4a3071ad707..4e778a5cc02 100644 --- a/src/mongo/db/s/migration_destination_manager.cpp +++ b/src/mongo/db/s/migration_destination_manager.cpp @@ -1119,14 +1119,14 @@ CollectionShardingRuntime::CleanupNotification MigrationDestinationManager::_not AutoGetCollection autoColl(opCtx, _nss, MODE_IX, MODE_X); auto* const css = CollectionShardingRuntime::get(opCtx, _nss); - - auto metadata = css->getMetadata(opCtx); + const auto optMetadata = css->getCurrentMetadataIfKnown(); // This can currently happen because drops aren't synchronized with in-migrations. The idea for // checking this here is that in the future we shouldn't have this problem. - if (!metadata->isSharded() || metadata->getCollVersion().epoch() != _epoch) { + if (!optMetadata || !(*optMetadata)->isSharded() || + (*optMetadata)->getCollVersion().epoch() != _epoch) { return Status{ErrorCodes::StaleShardVersion, - str::stream() << "not noting chunk " << redact(range.toString()) + str::stream() << "Not marking chunk " << redact(range.toString()) << " as pending because the epoch of " << _nss.ns() << " changed"}; @@ -1150,14 +1150,14 @@ void MigrationDestinationManager::_forgetPending(OperationContext* opCtx, ChunkR UninterruptibleLockGuard noInterrupt(opCtx->lockState()); AutoGetCollection autoColl(opCtx, _nss, MODE_IX, MODE_X); auto* const css = CollectionShardingRuntime::get(opCtx, _nss); - - auto metadata = css->getMetadata(opCtx); + const auto optMetadata = css->getCurrentMetadataIfKnown(); // This can currently happen because drops aren't synchronized with in-migrations. The idea for // checking this here is that in the future we shouldn't have this problem. - if (!metadata->isSharded() || metadata->getCollVersion().epoch() != _epoch) { - log() << "no need to forget pending chunk " << redact(range.toString()) - << " because the epoch for " << _nss.ns() << " changed"; + if (!optMetadata || !(*optMetadata)->isSharded() || + (*optMetadata)->getCollVersion().epoch() != _epoch) { + LOG(0) << "No need to forget pending chunk " << redact(range.toString()) + << " because the epoch for " << _nss.ns() << " changed"; return; } diff --git a/src/mongo/db/s/migration_source_manager.cpp b/src/mongo/db/s/migration_source_manager.cpp index d30c52ac2b2..328d9df662a 100644 --- a/src/mongo/db/s/migration_source_manager.cpp +++ b/src/mongo/db/s/migration_source_manager.cpp @@ -113,19 +113,6 @@ void refreshRecipientRoutingTable(OperationContext* opCtx, executor->scheduleRemoteCommand(request, noOp).getStatus().ignore(); } -Status checkCollectionEpochMatches(const ScopedCollectionMetadata& metadata, OID expectedEpoch) { - if (metadata->isSharded() && metadata->getCollVersion().epoch() == expectedEpoch) - return Status::OK(); - - return {ErrorCodes::IncompatibleShardingMetadata, - str::stream() << "The collection was dropped or recreated since the migration began. " - << "Expected collection epoch: " - << expectedEpoch.toString() - << ", but found: " - << (metadata->isSharded() ? metadata->getCollVersion().epoch().toString() - : "unsharded collection.")}; -} - } // namespace MONGO_FAIL_POINT_DEFINE(doNotRefreshRecipientAfterCommit); @@ -171,9 +158,15 @@ MigrationSourceManager::MigrationSourceManager(OperationContext* opCtx, collectionUUID = autoColl.getCollection()->uuid().value(); } - auto metadata = CollectionShardingState::get(opCtx, getNss())->getMetadata(opCtx); + auto optMetadata = + CollectionShardingState::get(opCtx, getNss())->getCurrentMetadataIfKnown(); + uassert(ErrorCodes::ConflictingOperationInProgress, + "The collection's sharding state was cleared by a concurrent operation", + optMetadata); + + auto& metadata = *optMetadata; uassert(ErrorCodes::IncompatibleShardingMetadata, - str::stream() << "cannot move chunks for an unsharded collection", + "Cannot move chunks for an unsharded collection", metadata->isSharded()); return std::make_tuple(std::move(metadata), std::move(collectionUUID)); @@ -243,14 +236,7 @@ Status MigrationSourceManager::startClone(OperationContext* opCtx) { { // Register for notifications from the replication subsystem - UninterruptibleLockGuard noInterrupt(opCtx->lockState()); - AutoGetCollection autoColl(opCtx, getNss(), MODE_IX, MODE_X); - auto* const css = CollectionShardingRuntime::get(opCtx, getNss()); - - const auto metadata = css->getMetadata(opCtx); - Status status = checkCollectionEpochMatches(metadata, _collectionEpoch); - if (!status.isOK()) - return status; + const auto metadata = _getCurrentMetadataAndCheckEpoch(opCtx); // Having the metadata manager registered on the collection sharding state is what indicates // that a chunk on that collection is being migrated. With an active migration, write @@ -259,6 +245,9 @@ Status MigrationSourceManager::startClone(OperationContext* opCtx) { _cloneDriver = stdx::make_unique<MigrationChunkClonerSourceLegacy>( _args, metadata->getKeyPattern(), _donorConnStr, _recipientHost); + UninterruptibleLockGuard noInterrupt(opCtx->lockState()); + AutoGetCollection autoColl(opCtx, getNss(), MODE_IX, MODE_X); + auto* const css = CollectionShardingRuntime::get(opCtx, getNss()); invariant(nullptr == std::exchange(msmForCsr(css), this)); _state = kCloning; } @@ -298,19 +287,7 @@ Status MigrationSourceManager::enterCriticalSection(OperationContext* opCtx) { _stats.totalDonorChunkCloneTimeMillis.addAndFetch(_cloneAndCommitTimer.millis()); _cloneAndCommitTimer.reset(); - { - const auto metadata = [&] { - UninterruptibleLockGuard noInterrupt(opCtx->lockState()); - AutoGetCollection autoColl(opCtx, _args.getNss(), MODE_IS); - return CollectionShardingState::get(opCtx, _args.getNss())->getMetadata(opCtx); - }(); - - Status status = checkCollectionEpochMatches(metadata, _collectionEpoch); - if (!status.isOK()) - return status; - - _notifyChangeStreamsOnRecipientFirstChunk(opCtx, metadata); - } + _notifyChangeStreamsOnRecipientFirstChunk(opCtx, _getCurrentMetadataAndCheckEpoch(opCtx)); // Mark the shard as running critical operation, which requires recovery on crash. // @@ -386,15 +363,7 @@ Status MigrationSourceManager::commitChunkMetadataOnConfig(OperationContext* opC BSONObjBuilder builder; { - const auto metadata = [&] { - UninterruptibleLockGuard noInterrupt(opCtx->lockState()); - AutoGetCollection autoColl(opCtx, _args.getNss(), MODE_IS); - return CollectionShardingState::get(opCtx, _args.getNss())->getMetadata(opCtx); - }(); - - Status status = checkCollectionEpochMatches(metadata, _collectionEpoch); - if (!status.isOK()) - return status; + const auto metadata = _getCurrentMetadataAndCheckEpoch(opCtx); ChunkType migratedChunkType; migratedChunkType.setMin(_args.getMinKey()); @@ -528,18 +497,7 @@ Status MigrationSourceManager::commitChunkMetadataOnConfig(OperationContext* opC << "' after commit failed"); } - auto refreshedMetadata = [&] { - UninterruptibleLockGuard noInterrupt(opCtx->lockState()); - AutoGetCollection autoColl(opCtx, getNss(), MODE_IS); - return CollectionShardingState::get(opCtx, getNss())->getMetadata(opCtx); - }(); - - if (!refreshedMetadata->isSharded()) { - return {ErrorCodes::NamespaceNotSharded, - str::stream() << "Chunk move failed because collection '" << getNss().ns() - << "' is no longer sharded. The migration commit error was: " - << migrationCommitStatus.toString()}; - } + const auto refreshedMetadata = _getCurrentMetadataAndCheckEpoch(opCtx); if (refreshedMetadata->keyBelongsToMe(_args.getMinKey())) { // This condition may only happen if the migration commit has failed for any reason @@ -562,8 +520,8 @@ Status MigrationSourceManager::commitChunkMetadataOnConfig(OperationContext* opC } // Migration succeeded - log() << "Migration succeeded and updated collection version to " - << refreshedMetadata->getCollVersion(); + LOG(0) << "Migration succeeded and updated collection version to " + << refreshedMetadata->getCollVersion(); MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangBeforeLeavingCriticalSection); @@ -656,6 +614,32 @@ void MigrationSourceManager::cleanupOnError(OperationContext* opCtx) { } } +ScopedCollectionMetadata MigrationSourceManager::_getCurrentMetadataAndCheckEpoch( + OperationContext* opCtx) { + auto metadata = [&] { + UninterruptibleLockGuard noInterrupt(opCtx->lockState()); + AutoGetCollection autoColl(opCtx, getNss(), MODE_IS); + auto* const css = CollectionShardingRuntime::get(opCtx, getNss()); + + const auto optMetadata = css->getCurrentMetadataIfKnown(); + uassert(ErrorCodes::ConflictingOperationInProgress, + "The collection's sharding state was cleared by a concurrent operation", + optMetadata); + return *optMetadata; + }(); + + uassert(ErrorCodes::ConflictingOperationInProgress, + str::stream() << "The collection was dropped or recreated since the migration began. " + << "Expected collection epoch: " + << _collectionEpoch.toString() + << ", but found: " + << (metadata->isSharded() ? metadata->getCollVersion().epoch().toString() + : "unsharded collection."), + metadata->isSharded() && metadata->getCollVersion().epoch() == _collectionEpoch); + + return metadata; +} + void MigrationSourceManager::_notifyChangeStreamsOnRecipientFirstChunk( OperationContext* opCtx, const ScopedCollectionMetadata& metadata) { // If this is not the first donation, there is nothing to be done diff --git a/src/mongo/db/s/migration_source_manager.h b/src/mongo/db/s/migration_source_manager.h index c053b73736e..eaaacdf0590 100644 --- a/src/mongo/db/s/migration_source_manager.h +++ b/src/mongo/db/s/migration_source_manager.h @@ -185,6 +185,8 @@ private: // comments explaining the various state transitions. enum State { kCreated, kCloning, kCloneCaughtUp, kCriticalSection, kCloneCompleted, kDone }; + ScopedCollectionMetadata _getCurrentMetadataAndCheckEpoch(OperationContext* opCtx); + /** * If this donation moves the first chunk to the recipient (i.e., the recipient didn't have any * chunks), this function writes a no-op message to the oplog, so that change stream will notice diff --git a/src/mongo/db/s/op_observer_sharding_impl.cpp b/src/mongo/db/s/op_observer_sharding_impl.cpp index fcae9cf4e72..8c990377d17 100644 --- a/src/mongo/db/s/op_observer_sharding_impl.cpp +++ b/src/mongo/db/s/op_observer_sharding_impl.cpp @@ -42,7 +42,7 @@ const auto getIsMigrating = OperationContext::declareDecoration<bool>(); void assertIntersectingChunkHasNotMoved(OperationContext* opCtx, CollectionShardingRuntime* csr, const BSONObj& doc) { - auto metadata = csr->getMetadata(opCtx); + auto metadata = csr->getMetadataForOperation(opCtx); if (!metadata->isSharded()) { return; } diff --git a/src/mongo/db/s/set_shard_version_command.cpp b/src/mongo/db/s/set_shard_version_command.cpp index b12aeea4435..12a8d0ef469 100644 --- a/src/mongo/db/s/set_shard_version_command.cpp +++ b/src/mongo/db/s/set_shard_version_command.cpp @@ -231,11 +231,12 @@ public: boost::optional<Lock::CollectionLock> collLock; collLock.emplace(opCtx->lockState(), nss.ns(), MODE_IS); - auto const css = CollectionShardingState::get(opCtx, nss); + auto* const css = CollectionShardingState::get(opCtx, nss); const ChunkVersion collectionShardVersion = [&] { - auto metadata = css->getMetadata(opCtx); - return metadata->isSharded() ? metadata->getShardVersion() - : ChunkVersion::UNSHARDED(); + auto optMetadata = css->getCurrentMetadataIfKnown(); + return (optMetadata && (*optMetadata)->isSharded()) + ? (*optMetadata)->getShardVersion() + : ChunkVersion::UNSHARDED(); }(); if (requestedVersion.isWriteCompatibleWith(collectionShardVersion)) { @@ -349,11 +350,13 @@ public: { AutoGetCollection autoColl(opCtx, nss, MODE_IS); - ChunkVersion currVersion = ChunkVersion::UNSHARDED(); - auto metadata = CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); - if (metadata->isSharded()) { - currVersion = metadata->getShardVersion(); - } + const ChunkVersion currVersion = [&] { + auto* const css = CollectionShardingState::get(opCtx, nss); + auto optMetadata = css->getCurrentMetadataIfKnown(); + return (optMetadata && (*optMetadata)->isSharded()) + ? (*optMetadata)->getShardVersion() + : ChunkVersion::UNSHARDED(); + }(); if (!status.isOK()) { // The reload itself was interrupted or confused here diff --git a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp index 3d58836fb8b..81336bd386b 100644 --- a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp +++ b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp @@ -76,19 +76,16 @@ void onShardVersionMismatch(OperationContext* opCtx, const auto currentShardVersion = [&] { AutoGetCollection autoColl(opCtx, nss, MODE_IS); - const auto currentMetadata = CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); - if (currentMetadata->isSharded()) { - return currentMetadata->getShardVersion(); - } - - return ChunkVersion::UNSHARDED(); + return CollectionShardingState::get(opCtx, nss)->getCurrentShardVersionIfKnown(); }(); - if (currentShardVersion.epoch() == shardVersionReceived.epoch() && - currentShardVersion.majorVersion() >= shardVersionReceived.majorVersion()) { - // Don't need to remotely reload if we're in the same epoch and the requested version is - // smaller than the one we know about. This means that the remote side is behind. - return; + if (currentShardVersion) { + if (currentShardVersion->epoch() == shardVersionReceived.epoch() && + currentShardVersion->majorVersion() >= shardVersionReceived.majorVersion()) { + // Don't need to remotely reload if we're in the same epoch and the requested version is + // smaller than the one we know about. This means that the remote side is behind. + return; + } } if (MONGO_FAIL_POINT(skipShardFilteringMetadataRefresh)) { @@ -146,58 +143,69 @@ ChunkVersion forceShardFilteringMetadataRefresh(OperationContext* opCtx, invariant(!opCtx->lockState()->isLocked()); invariant(!opCtx->getClient()->isInDirectClient()); - auto const shardingState = ShardingState::get(opCtx); + auto* const shardingState = ShardingState::get(opCtx); invariant(shardingState->canAcceptShardedCommands()); - const auto routingInfo = + auto routingInfo = uassertStatusOK(Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfoWithRefresh( opCtx, nss, forceRefreshFromThisThread)); - const auto cm = routingInfo.cm(); + auto cm = routingInfo.cm(); if (!cm) { // No chunk manager, so unsharded. // Exclusive collection lock needed since we're now changing the metadata AutoGetCollection autoColl(opCtx, nss, MODE_IX, MODE_X); - - auto* const css = CollectionShardingRuntime::get(opCtx, nss); - css->setFilteringMetadata(opCtx, CollectionMetadata()); + CollectionShardingRuntime::get(opCtx, nss) + ->setFilteringMetadata(opCtx, CollectionMetadata()); return ChunkVersion::UNSHARDED(); } + // Optimistic check with only IS lock in order to avoid threads piling up on the collection X + // lock below { AutoGetCollection autoColl(opCtx, nss, MODE_IS); - auto metadata = CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); + auto optMetadata = CollectionShardingState::get(opCtx, nss)->getCurrentMetadataIfKnown(); // We already have newer version - if (metadata->isSharded() && - metadata->getCollVersion().epoch() == cm->getVersion().epoch() && - metadata->getCollVersion() >= cm->getVersion()) { - LOG(1) << "Skipping refresh of metadata for " << nss << " " - << metadata->getCollVersion() << " with an older " << cm->getVersion(); - return metadata->getShardVersion(); + if (optMetadata) { + const auto& metadata = *optMetadata; + if (metadata->isSharded() && + metadata->getCollVersion().epoch() == cm->getVersion().epoch() && + metadata->getCollVersion() >= cm->getVersion()) { + LOG(1) << "Skipping refresh of metadata for " << nss << " " + << metadata->getCollVersion() << " with an older " << cm->getVersion(); + return metadata->getShardVersion(); + } } } // Exclusive collection lock needed since we're now changing the metadata AutoGetCollection autoColl(opCtx, nss, MODE_IX, MODE_X); - auto* const css = CollectionShardingRuntime::get(opCtx, nss); - auto metadata = css->getMetadata(opCtx); + { + auto optMetadata = CollectionShardingState::get(opCtx, nss)->getCurrentMetadataIfKnown(); - // We already have newer version - if (metadata->isSharded() && metadata->getCollVersion().epoch() == cm->getVersion().epoch() && - metadata->getCollVersion() >= cm->getVersion()) { - LOG(1) << "Skipping refresh of metadata for " << nss << " " << metadata->getCollVersion() - << " with an older " << cm->getVersion(); - return metadata->getShardVersion(); + // We already have newer version + if (optMetadata) { + const auto& metadata = *optMetadata; + if (metadata->isSharded() && + metadata->getCollVersion().epoch() == cm->getVersion().epoch() && + metadata->getCollVersion() >= cm->getVersion()) { + LOG(1) << "Skipping refresh of metadata for " << nss << " " + << metadata->getCollVersion() << " with an older " << cm->getVersion(); + return metadata->getShardVersion(); + } + } } - css->setFilteringMetadata(opCtx, CollectionMetadata(cm, shardingState->shardId())); + CollectionMetadata metadata(std::move(cm), shardingState->shardId()); + const auto newShardVersion = metadata.getShardVersion(); - return css->getMetadata(opCtx)->getShardVersion(); + css->setFilteringMetadata(opCtx, std::move(metadata)); + return newShardVersion; } Status onDbVersionMismatchNoExcept( diff --git a/src/mongo/db/s/shard_server_op_observer.cpp b/src/mongo/db/s/shard_server_op_observer.cpp index 4330da209bc..4164864ea6c 100644 --- a/src/mongo/db/s/shard_server_op_observer.cpp +++ b/src/mongo/db/s/shard_server_op_observer.cpp @@ -206,8 +206,8 @@ void ShardServerOpObserver::onInserts(OperationContext* opCtx, std::vector<InsertStatement>::const_iterator begin, std::vector<InsertStatement>::const_iterator end, bool fromMigrate) { - auto const css = CollectionShardingState::get(opCtx, nss); - const auto metadata = css->getMetadata(opCtx); + auto* const css = CollectionShardingState::get(opCtx, nss); + const auto metadata = css->getMetadataForOperation(opCtx); for (auto it = begin; it != end; ++it) { const auto& insertedDoc = it->doc; @@ -236,8 +236,8 @@ void ShardServerOpObserver::onInserts(OperationContext* opCtx, } void ShardServerOpObserver::onUpdate(OperationContext* opCtx, const OplogUpdateEntryArgs& args) { - auto const css = CollectionShardingState::get(opCtx, args.nss); - const auto metadata = css->getMetadata(opCtx); + auto* const css = CollectionShardingState::get(opCtx, args.nss); + const auto metadata = css->getMetadataForOperation(opCtx); if (args.nss == NamespaceString::kShardConfigCollectionsNamespace) { // Notification of routing table changes are only needed on secondaries diff --git a/src/mongo/db/s/split_chunk.cpp b/src/mongo/db/s/split_chunk.cpp index f82f8f56bf2..d3c0a6908db 100644 --- a/src/mongo/db/s/split_chunk.cpp +++ b/src/mongo/db/s/split_chunk.cpp @@ -96,16 +96,17 @@ bool checkIfSingleDoc(OperationContext* opCtx, */ bool checkMetadataForSuccessfulSplitChunk(OperationContext* opCtx, const NamespaceString& nss, + const OID& epoch, const ChunkRange& chunkRange, const std::vector<BSONObj>& splitKeys) { const auto metadataAfterSplit = [&] { AutoGetCollection autoColl(opCtx, nss, MODE_IS); - return CollectionShardingState::get(opCtx, nss)->getMetadata(opCtx); + return CollectionShardingState::get(opCtx, nss)->getCurrentMetadata(); }(); uassert(ErrorCodes::StaleEpoch, - str::stream() << "Collection " << nss.ns() << " became unsharded", - metadataAfterSplit->isSharded()); + str::stream() << "Collection " << nss.ns() << " changed since split start", + metadataAfterSplit->getCollVersion().epoch() == epoch); auto newChunkBounds(splitKeys); auto startKey = chunkRange.getMin(); @@ -208,7 +209,8 @@ StatusWith<boost::optional<ChunkRange>> splitChunk(OperationContext* opCtx, if (!commandStatus.isOK() || !writeConcernStatus.isOK()) { forceShardFilteringMetadataRefresh(opCtx, nss); - if (checkMetadataForSuccessfulSplitChunk(opCtx, nss, chunkRange, splitKeys)) { + if (checkMetadataForSuccessfulSplitChunk( + opCtx, nss, expectedCollectionEpoch, chunkRange, splitKeys)) { // Split was committed. } else if (!commandStatus.isOK()) { return commandStatus; diff --git a/src/mongo/db/storage/wiredtiger/SConscript b/src/mongo/db/storage/wiredtiger/SConscript index 8d3d406d57d..ce34db05f7f 100644 --- a/src/mongo/db/storage/wiredtiger/SConscript +++ b/src/mongo/db/storage/wiredtiger/SConscript @@ -83,8 +83,8 @@ if wiredtiger: ], LIBDEPS_PRIVATE= [ '$BUILD_DIR/mongo/db/snapshot_window_options', - '$BUILD_DIR/mongo/util/options_parser/options_parser', '$BUILD_DIR/mongo/db/storage/storage_repair_observer', + '$BUILD_DIR/mongo/util/options_parser/options_parser', ], ) |