diff options
author | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2017-02-27 17:05:49 -0500 |
---|---|---|
committer | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2017-04-11 16:41:30 -0400 |
commit | 0f715bb978334314a0304b3d9aa629d297f2b313 (patch) | |
tree | 5bc0eadc390cacf0ba55404582eed87f30d2df29 /src | |
parent | ed67f8a91fe22f8667d64f57f9fad1e404ba3f05 (diff) | |
download | mongo-0f715bb978334314a0304b3d9aa629d297f2b313.tar.gz |
SERVER-22611 Sharding catalog cache refactor
(cherry picked from commit 39e06c9ef8c797ad626956b564ac9ebe295cbaf3)
(cherry picked from commit d595a0fc8150411fd6541d06b08de9bee0039baa)
Diffstat (limited to 'src')
53 files changed, 2697 insertions, 3002 deletions
diff --git a/src/mongo/db/commands/mr.cpp b/src/mongo/db/commands/mr.cpp index ada4edb1f6a..4aeed6d5978 100644 --- a/src/mongo/db/commands/mr.cpp +++ b/src/mongo/db/commands/mr.cpp @@ -70,14 +70,11 @@ #include "mongo/db/server_options.h" #include "mongo/db/service_context.h" #include "mongo/s/catalog_cache.h" -#include "mongo/s/chunk.h" -#include "mongo/s/chunk_manager.h" #include "mongo/s/client/parallel.h" #include "mongo/s/client/shard_connection.h" #include "mongo/s/client/shard_registry.h" #include "mongo/s/grid.h" #include "mongo/s/shard_key_pattern.h" -#include "mongo/s/sharding_raii.h" #include "mongo/s/stale_exception.h" #include "mongo/scripting/engine.h" #include "mongo/stdx/mutex.h" @@ -583,7 +580,7 @@ void State::appendResults(BSONObjBuilder& final) { * Does post processing on output collection. * This may involve replacing, merging or reducing. */ -long long State::postProcessCollection(OperationContext* txn, +long long State::postProcessCollection(OperationContext* opCtx, CurOp* curOp, ProgressMeterHolder& pm) { if (_onDisk == false || _config.outputOptions.outType == Config::INMEMORY) @@ -591,22 +588,22 @@ long long State::postProcessCollection(OperationContext* txn, bool holdingGlobalLock = false; if (_config.outputOptions.outNonAtomic) - return postProcessCollectionNonAtomic(txn, curOp, pm, holdingGlobalLock); + return postProcessCollectionNonAtomic(opCtx, curOp, pm, holdingGlobalLock); - invariant(!txn->lockState()->isLocked()); + invariant(!opCtx->lockState()->isLocked()); - ScopedTransaction transaction(txn, MODE_X); + ScopedTransaction transaction(opCtx, MODE_X); // This must be global because we may write across different databases. - Lock::GlobalWrite lock(txn->lockState()); + Lock::GlobalWrite lock(opCtx->lockState()); holdingGlobalLock = true; - return postProcessCollectionNonAtomic(txn, curOp, pm, holdingGlobalLock); + return postProcessCollectionNonAtomic(opCtx, curOp, pm, holdingGlobalLock); } namespace { // Runs a count against the namespace specified by 'ns'. If the caller holds the global write lock, // then this function does not acquire any additional locks. -unsigned long long _collectionCount(OperationContext* txn, +unsigned long long _collectionCount(OperationContext* opCtx, const string& ns, bool callerHoldsGlobalLock) { Collection* coll = nullptr; @@ -615,32 +612,32 @@ unsigned long long _collectionCount(OperationContext* txn, // If the global write lock is held, we must avoid using AutoGetCollectionForRead as it may lead // to deadlock when waiting for a majority snapshot to be committed. See SERVER-24596. if (callerHoldsGlobalLock) { - Database* db = dbHolder().get(txn, ns); + Database* db = dbHolder().get(opCtx, ns); if (db) { coll = db->getCollection(ns); } } else { - ctx.emplace(txn, NamespaceString(ns)); + ctx.emplace(opCtx, NamespaceString(ns)); coll = ctx->getCollection(); } - return coll ? coll->numRecords(txn) : 0; + return coll ? coll->numRecords(opCtx) : 0; } } // namespace -long long State::postProcessCollectionNonAtomic(OperationContext* txn, +long long State::postProcessCollectionNonAtomic(OperationContext* opCtx, CurOp* curOp, ProgressMeterHolder& pm, bool callerHoldsGlobalLock) { if (_config.outputOptions.finalNamespace == _config.tempNamespace) - return _collectionCount(txn, _config.outputOptions.finalNamespace, callerHoldsGlobalLock); + return _collectionCount(opCtx, _config.outputOptions.finalNamespace, callerHoldsGlobalLock); if (_config.outputOptions.outType == Config::REPLACE || - _collectionCount(txn, _config.outputOptions.finalNamespace, callerHoldsGlobalLock) == 0) { - ScopedTransaction transaction(txn, MODE_X); + _collectionCount(opCtx, _config.outputOptions.finalNamespace, callerHoldsGlobalLock) == 0) { + ScopedTransaction transaction(opCtx, MODE_X); // This must be global because we may write across different databases. - Lock::GlobalWrite lock(txn->lockState()); + Lock::GlobalWrite lock(opCtx->lockState()); // replace: just rename from temp to final collection name, dropping previous collection _db.dropCollection(_config.outputOptions.finalNamespace); BSONObj info; @@ -658,19 +655,20 @@ long long State::postProcessCollectionNonAtomic(OperationContext* txn, } else if (_config.outputOptions.outType == Config::MERGE) { // merge: upsert new docs into old collection { - const auto count = _collectionCount(txn, _config.tempNamespace, callerHoldsGlobalLock); - stdx::lock_guard<Client> lk(*txn->getClient()); + const auto count = + _collectionCount(opCtx, _config.tempNamespace, callerHoldsGlobalLock); + stdx::lock_guard<Client> lk(*opCtx->getClient()); curOp->setMessage_inlock( "m/r: merge post processing", "M/R Merge Post Processing Progress", count); } unique_ptr<DBClientCursor> cursor = _db.query(_config.tempNamespace, BSONObj()); while (cursor->more()) { - ScopedTransaction scopedXact(txn, MODE_X); - Lock::DBLock lock(txn->lockState(), + ScopedTransaction scopedXact(opCtx, MODE_X); + Lock::DBLock lock(opCtx->lockState(), nsToDatabaseSubstring(_config.outputOptions.finalNamespace), MODE_X); BSONObj o = cursor->nextSafe(); - Helpers::upsert(txn, _config.outputOptions.finalNamespace, o); + Helpers::upsert(opCtx, _config.outputOptions.finalNamespace, o); pm.hit(); } _db.dropCollection(_config.tempNamespace); @@ -680,25 +678,26 @@ long long State::postProcessCollectionNonAtomic(OperationContext* txn, BSONList values; { - const auto count = _collectionCount(txn, _config.tempNamespace, callerHoldsGlobalLock); - stdx::lock_guard<Client> lk(*txn->getClient()); + const auto count = + _collectionCount(opCtx, _config.tempNamespace, callerHoldsGlobalLock); + stdx::lock_guard<Client> lk(*opCtx->getClient()); curOp->setMessage_inlock( "m/r: reduce post processing", "M/R Reduce Post Processing Progress", count); } unique_ptr<DBClientCursor> cursor = _db.query(_config.tempNamespace, BSONObj()); while (cursor->more()) { - ScopedTransaction transaction(txn, MODE_X); + ScopedTransaction transaction(opCtx, MODE_X); // This must be global because we may write across different databases. - Lock::GlobalWrite lock(txn->lockState()); + Lock::GlobalWrite lock(opCtx->lockState()); BSONObj temp = cursor->nextSafe(); BSONObj old; bool found; { const std::string& finalNamespace = _config.outputOptions.finalNamespace; - OldClientContext tx(txn, finalNamespace); + OldClientContext tx(opCtx, finalNamespace); Collection* coll = getCollectionOrUassert(tx.db(), finalNamespace); - found = Helpers::findOne(txn, coll, temp["_id"].wrap(), old, true); + found = Helpers::findOne(opCtx, coll, temp["_id"].wrap(), old, true); } if (found) { @@ -706,18 +705,18 @@ long long State::postProcessCollectionNonAtomic(OperationContext* txn, values.clear(); values.push_back(temp); values.push_back(old); - Helpers::upsert(txn, + Helpers::upsert(opCtx, _config.outputOptions.finalNamespace, _config.reducer->finalReduce(values, _config.finalizer.get())); } else { - Helpers::upsert(txn, _config.outputOptions.finalNamespace, temp); + Helpers::upsert(opCtx, _config.outputOptions.finalNamespace, temp); } pm.hit(); } pm.finished(); } - return _collectionCount(txn, _config.outputOptions.finalNamespace, callerHoldsGlobalLock); + return _collectionCount(opCtx, _config.outputOptions.finalNamespace, callerHoldsGlobalLock); } /** @@ -791,8 +790,14 @@ void State::_insertToInc(BSONObj& o) { MONGO_WRITE_CONFLICT_RETRY_LOOP_END(_txn, "M/R insertToInc", _config.incLong); } -State::State(OperationContext* txn, const Config& c) - : _config(c), _db(txn), _useIncremental(true), _txn(txn), _size(0), _dupCount(0), _numEmits(0) { +State::State(OperationContext* opCtx, const Config& c) + : _config(c), + _db(opCtx), + _useIncremental(true), + _txn(opCtx), + _size(0), + _dupCount(0), + _numEmits(0) { _temp.reset(new InMemory()); _onDisk = _config.outputOptions.outType != Config::INMEMORY; } @@ -1008,7 +1013,7 @@ BSONObj _nativeToTemp(const BSONObj& args, void* data) { * After calling this method, the temp collection will be completed. * If inline, the results will be in the in memory map */ -void State::finalReduce(OperationContext* txn, CurOp* curOp, ProgressMeterHolder& pm) { +void State::finalReduce(OperationContext* opCtx, CurOp* curOp, ProgressMeterHolder& pm) { if (_jsMode) { // apply the reduce within JS if (_onDisk) { @@ -1088,7 +1093,7 @@ void State::finalReduce(OperationContext* txn, CurOp* curOp, ProgressMeterHolder auto qr = stdx::make_unique<QueryRequest>(nss); qr->setSort(sortKey); - auto statusWithCQ = CanonicalQuery::canonicalize(txn, std::move(qr), extensionsCallback); + auto statusWithCQ = CanonicalQuery::canonicalize(opCtx, std::move(qr), extensionsCallback); verify(statusWithCQ.isOK()); std::unique_ptr<CanonicalQuery> cq = std::move(statusWithCQ.getValue()); @@ -1344,7 +1349,7 @@ public: addPrivilegesRequiredForMapReduce(this, dbname, cmdObj, out); } - bool run(OperationContext* txn, + bool run(OperationContext* opCtx, const string& dbname, BSONObj& cmd, int, @@ -1354,9 +1359,9 @@ public: boost::optional<DisableDocumentValidation> maybeDisableValidation; if (shouldBypassDocumentValidationForCommand(cmd)) - maybeDisableValidation.emplace(txn); + maybeDisableValidation.emplace(opCtx); - auto client = txn->getClient(); + auto client = opCtx->getClient(); if (client->isInDirectClient()) { return appendCommandStatus( @@ -1364,7 +1369,7 @@ public: Status(ErrorCodes::IllegalOperation, "Cannot run mapReduce command from eval()")); } - auto curOp = CurOp::get(txn); + auto curOp = CurOp::get(opCtx); Config config(dbname, cmd); @@ -1386,7 +1391,7 @@ public: unique_ptr<RangePreserver> rangePreserver; ScopedCollectionMetadata collMetadata; { - AutoGetCollectionForRead ctx(txn, config.ns); + AutoGetCollectionForRead ctx(opCtx, config.ns); Collection* collection = ctx.getCollection(); if (collection) { @@ -1395,20 +1400,20 @@ public: // Get metadata before we check our version, to make sure it doesn't increment // in the meantime. Need to do this in the same lock scope as the block. - if (ShardingState::get(txn)->needCollectionMetadata(txn, config.ns)) { - collMetadata = CollectionShardingState::get(txn, config.ns)->getMetadata(); + if (ShardingState::get(opCtx)->needCollectionMetadata(opCtx, config.ns)) { + collMetadata = CollectionShardingState::get(opCtx, config.ns)->getMetadata(); } } // Ensure that the RangePreserver is freed under the lock. This is necessary since the // RangePreserver's destructor unpins a ClientCursor, and access to the CursorManager must // be done under the lock. - ON_BLOCK_EXIT([txn, &config, &rangePreserver] { + ON_BLOCK_EXIT([opCtx, &config, &rangePreserver] { if (rangePreserver) { // Be sure not to use AutoGetCollectionForRead here, since that has side-effects // other than lock acquisition. - ScopedTransaction scopedTxn(txn, MODE_IS); - AutoGetCollection ctx(txn, NamespaceString(config.ns), MODE_IS); + ScopedTransaction scopedopCtx(opCtx, MODE_IS); + AutoGetCollection ctx(opCtx, NamespaceString(config.ns), MODE_IS); rangePreserver.reset(); } }); @@ -1417,7 +1422,7 @@ public: BSONObjBuilder countsBuilder; BSONObjBuilder timingBuilder; - State state(txn, config); + State state(opCtx, config); if (!state.sourceExists()) { errmsg = "ns doesn't exist"; return false; @@ -1441,7 +1446,7 @@ public: bool showTotal = true; if (state.config().filter.isEmpty()) { const bool holdingGlobalLock = false; - const auto count = _collectionCount(txn, config.ns, holdingGlobalLock); + const auto count = _collectionCount(opCtx, config.ns, holdingGlobalLock); progressTotal = (config.limit && (unsigned)config.limit < count) ? config.limit : count; } else { @@ -1450,7 +1455,7 @@ public: progressTotal = 1; } - stdx::unique_lock<Client> lk(*txn->getClient()); + stdx::unique_lock<Client> lk(*opCtx->getClient()); ProgressMeter& progress(curOp->setMessage_inlock( "m/r: (1/3) emit phase", "M/R: (1/3) Emit Progress", progressTotal)); lk.unlock(); @@ -1471,18 +1476,18 @@ public: const NamespaceString nss(config.ns); // Need lock and context to use it - unique_ptr<ScopedTransaction> scopedXact(new ScopedTransaction(txn, MODE_IS)); - unique_ptr<AutoGetDb> scopedAutoDb(new AutoGetDb(txn, nss.db(), MODE_S)); + unique_ptr<ScopedTransaction> scopedXact(new ScopedTransaction(opCtx, MODE_IS)); + unique_ptr<AutoGetDb> scopedAutoDb(new AutoGetDb(opCtx, nss.db(), MODE_S)); auto qr = stdx::make_unique<QueryRequest>(nss); qr->setFilter(config.filter); qr->setSort(config.sort); qr->setCollation(config.collation); - const ExtensionsCallbackReal extensionsCallback(txn, &nss); + const ExtensionsCallbackReal extensionsCallback(opCtx, &nss); auto statusWithCQ = - CanonicalQuery::canonicalize(txn, std::move(qr), extensionsCallback); + CanonicalQuery::canonicalize(opCtx, std::move(qr), extensionsCallback); if (!statusWithCQ.isOK()) { uasserted(17238, "Can't canonicalize query " + config.filter.toString()); return 0; @@ -1496,7 +1501,7 @@ public: invariant(coll); auto statusWithPlanExecutor = - getExecutor(txn, coll, std::move(cq), PlanExecutor::YIELD_AUTO); + getExecutor(opCtx, coll, std::move(cq), PlanExecutor::YIELD_AUTO); if (!statusWithPlanExecutor.isOK()) { uasserted(17239, "Can't get executor for query " + config.filter.toString()); @@ -1507,8 +1512,8 @@ public: } { - stdx::lock_guard<Client> lk(*txn->getClient()); - CurOp::get(txn)->setPlanSummary_inlock(Explain::getPlanSummary(exec.get())); + stdx::lock_guard<Client> lk(*opCtx->getClient()); + CurOp::get(opCtx)->setPlanSummary_inlock(Explain::getPlanSummary(exec.get())); } Timer mt; @@ -1551,8 +1556,8 @@ public: state.reduceAndSpillInMemoryStateIfNeeded(); - scopedXact.reset(new ScopedTransaction(txn, MODE_IS)); - scopedAutoDb.reset(new AutoGetDb(txn, nss.db(), MODE_S)); + scopedXact.reset(new ScopedTransaction(opCtx, MODE_IS)); + scopedAutoDb.reset(new AutoGetDb(opCtx, nss.db(), MODE_S)); if (!exec->restoreState()) { return appendCommandStatus( @@ -1564,7 +1569,7 @@ public: reduceTime += t.micros(); - txn->checkForInterrupt(); + opCtx->checkForInterrupt(); } pm.hit(); @@ -1591,7 +1596,7 @@ public: Collection* coll = scopedAutoDb->getDb()->getCollection(config.ns); invariant(coll); // 'exec' hasn't been killed, so collection must be alive. - coll->infoCache()->notifyOfQuery(txn, stats.indexesUsed); + coll->infoCache()->notifyOfQuery(opCtx, stats.indexesUsed); if (curOp->shouldDBProfile()) { BSONObjBuilder execStatsBob; @@ -1601,7 +1606,7 @@ public: } pm.finished(); - txn->checkForInterrupt(); + opCtx->checkForInterrupt(); // update counters countsBuilder.appendNumber("input", numInputs); @@ -1613,7 +1618,7 @@ public: timingBuilder.append("emitLoop", t.millis()); { - stdx::lock_guard<Client> lk(*txn->getClient()); + stdx::lock_guard<Client> lk(*opCtx->getClient()); curOp->setMessage_inlock("m/r: (2/3) final reduce in memory", "M/R: (2/3) Final In-Memory Reduce Progress"); } @@ -1624,13 +1629,13 @@ public: // if not inline: dump the in memory map to inc collection, all data is on disk state.dumpToInc(); // final reduce - state.finalReduce(txn, curOp, pm); + state.finalReduce(opCtx, curOp, pm); reduceTime += rt.micros(); // Ensure the profile shows the source namespace. If the output was not inline, the // active namespace will be the temporary collection we inserted into. { - stdx::lock_guard<Client> lk(*txn->getClient()); + stdx::lock_guard<Client> lk(*opCtx->getClient()); curOp->setNS_inlock(config.ns); } @@ -1638,7 +1643,7 @@ public: timingBuilder.appendNumber("reduceTime", reduceTime / 1000); timingBuilder.append("mode", state.jsMode() ? "js" : "mixed"); - long long finalCount = state.postProcessCollection(txn, curOp, pm); + long long finalCount = state.postProcessCollection(opCtx, curOp, pm); state.appendResults(result); timingBuilder.appendNumber("total", t.millis()); @@ -1701,7 +1706,7 @@ public: actions.addAction(ActionType::internal); out->push_back(Privilege(ResourcePattern::forClusterResource(), actions)); } - bool run(OperationContext* txn, + bool run(OperationContext* opCtx, const string& dbname, BSONObj& cmdObj, int, @@ -1717,7 +1722,7 @@ public: boost::optional<DisableDocumentValidation> maybeDisableValidation; if (shouldBypassDocumentValidationForCommand(cmdObj)) - maybeDisableValidation.emplace(txn); + maybeDisableValidation.emplace(opCtx); ShardedConnectionInfo::addHook(); @@ -1731,10 +1736,10 @@ public: inputNS = dbname + "." + shardedOutputCollection; } - CurOp* curOp = CurOp::get(txn); + CurOp* curOp = CurOp::get(opCtx); Config config(dbname, cmdObj.firstElement().embeddedObjectUserCheck()); - State state(txn, config); + State state(opCtx, config); state.init(); // no need for incremental collection because records are already sorted @@ -1744,7 +1749,7 @@ public: BSONObj shardCounts = cmdObj["shardCounts"].embeddedObjectUserCheck(); BSONObj counts = cmdObj["counts"].embeddedObjectUserCheck(); - stdx::unique_lock<Client> lk(*txn->getClient()); + stdx::unique_lock<Client> lk(*opCtx->getClient()); ProgressMeterHolder pm(curOp->setMessage_inlock("m/r: merge sort and reduce", "M/R Merge Sort and Reduce Progress")); lk.unlock(); @@ -1758,14 +1763,13 @@ public: std::string server = e.fieldName(); servers.insert(server); - uassertStatusOK(Grid::get(txn)->shardRegistry()->getShard(txn, server)); + uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(opCtx, server)); } } state.prepTempCollection(); ON_BLOCK_EXIT_OBJ(state, &State::dropTempCollections); - BSONList values; if (!config.outputOptions.outDB.empty()) { BSONObjBuilder loc; if (!config.outputOptions.outDB.empty()) @@ -1773,32 +1777,29 @@ public: if (!config.outputOptions.collectionName.empty()) loc.append("collection", config.outputOptions.collectionName); result.append("result", loc.obj()); - } else { - if (!config.outputOptions.collectionName.empty()) - result.append("result", config.outputOptions.collectionName); - } - - auto scopedDbStatus = ScopedShardDatabase::getExisting(txn, dbname); - if (!scopedDbStatus.isOK()) { - return appendCommandStatus(result, scopedDbStatus.getStatus()); + } else if (!config.outputOptions.collectionName.empty()) { + result.append("result", config.outputOptions.collectionName); } - auto confOut = scopedDbStatus.getValue().db(); + std::vector<std::shared_ptr<Chunk>> chunks; - vector<shared_ptr<Chunk>> chunks; - if (confOut->isSharded(config.outputOptions.finalNamespace)) { - shared_ptr<ChunkManager> cm = - confOut->getChunkManager(txn, config.outputOptions.finalNamespace); + if (config.outputOptions.outType != Config::OutputType::INMEMORY) { + auto outRoutingInfoStatus = Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfo( + opCtx, config.outputOptions.finalNamespace); + if (!outRoutingInfoStatus.isOK()) { + return appendCommandStatus(result, outRoutingInfoStatus.getStatus()); + } - // Fetch result from other shards 1 chunk at a time. It would be better to do just one - // big $or query, but then the sorting would not be efficient. - const string shardName = ShardingState::get(txn)->getShardName(); - const ChunkMap& chunkMap = cm->getChunkMap(); + if (auto cm = outRoutingInfoStatus.getValue().cm()) { + // Fetch result from other shards 1 chunk at a time. It would be better to do just + // one big $or query, but then the sorting would not be efficient. + const string shardName = ShardingState::get(opCtx)->getShardName(); - for (ChunkMap::const_iterator it = chunkMap.begin(); it != chunkMap.end(); ++it) { - shared_ptr<Chunk> chunk = it->second; - if (chunk->getShardId() == shardName) { - chunks.push_back(chunk); + for (const auto& chunkEntry : cm->chunkMap()) { + const auto& chunk = chunkEntry.second; + if (chunk->getShardId() == shardName) { + chunks.push_back(chunk); + } } } } @@ -1807,6 +1808,8 @@ public: unsigned int index = 0; BSONObj query; BSONArrayBuilder chunkSizes; + BSONList values; + while (true) { shared_ptr<Chunk> chunk; if (chunks.size() > 0) { @@ -1822,7 +1825,9 @@ public: BSONObj sortKey = BSON("_id" << 1); ParallelSortClusteredCursor cursor( servers, inputNS, Query(query).sort(sortKey), QueryOption_NoCursorTimeout); - cursor.init(txn); + + cursor.init(opCtx); + int chunkSize = 0; while (cursor.more() || !values.empty()) { @@ -1848,7 +1853,9 @@ public: state.insert(config.tempNamespace, res); else state.emit(res); + values.clear(); + if (!t.isEmpty()) values.push_back(t); } @@ -1857,6 +1864,7 @@ public: chunkSizes.append(chunk->getMin()); chunkSizes.append(chunkSize); } + if (++index >= chunks.size()) break; } @@ -1866,7 +1874,7 @@ public: result.append("chunkSizes", chunkSizes.arr()); - long long outputCount = state.postProcessCollection(txn, curOp, pm); + long long outputCount = state.postProcessCollection(opCtx, curOp, pm); state.appendResults(result); BSONObjBuilder countsB(32); @@ -1875,8 +1883,10 @@ public: countsB.append("output", outputCount); result.append("counts", countsB.obj()); - return 1; + return true; } + } mapReduceFinishCommand; -} -} + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/s/balancer/balancer.cpp b/src/mongo/db/s/balancer/balancer.cpp index 96a7f24fce9..6096c0e42ba 100644 --- a/src/mongo/db/s/balancer/balancer.cpp +++ b/src/mongo/db/s/balancer/balancer.cpp @@ -46,12 +46,11 @@ #include "mongo/s/balancer_configuration.h" #include "mongo/s/catalog/sharding_catalog_client.h" #include "mongo/s/catalog/type_chunk.h" -#include "mongo/s/client/shard.h" +#include "mongo/s/catalog_cache.h" #include "mongo/s/client/shard_registry.h" #include "mongo/s/cluster_identity_loader.h" #include "mongo/s/grid.h" #include "mongo/s/shard_util.h" -#include "mongo/s/sharding_raii.h" #include "mongo/stdx/memory.h" #include "mongo/util/exit.h" #include "mongo/util/log.h" @@ -186,12 +185,12 @@ Balancer* Balancer::get(OperationContext* operationContext) { return get(operationContext->getServiceContext()); } -void Balancer::initiateBalancer(OperationContext* txn) { +void Balancer::initiateBalancer(OperationContext* opCtx) { stdx::lock_guard<stdx::mutex> scopedLock(_mutex); invariant(_state == kStopped); _state = kRunning; - _migrationManager.startRecoveryAndAcquireDistLocks(txn); + _migrationManager.startRecoveryAndAcquireDistLocks(opCtx); invariant(!_thread.joinable()); invariant(!_threadOperationContext); @@ -240,15 +239,15 @@ void Balancer::waitForBalancerToStop() { LOG(1) << "Balancer thread terminated"; } -void Balancer::joinCurrentRound(OperationContext* txn) { +void Balancer::joinCurrentRound(OperationContext* opCtx) { stdx::unique_lock<stdx::mutex> scopedLock(_mutex); const auto numRoundsAtStart = _numBalancerRounds; _condVar.wait(scopedLock, [&] { return !_inBalancerRound || _numBalancerRounds != numRoundsAtStart; }); } -Status Balancer::rebalanceSingleChunk(OperationContext* txn, const ChunkType& chunk) { - auto migrateStatus = _chunkSelectionPolicy->selectSpecificChunkToMove(txn, chunk); +Status Balancer::rebalanceSingleChunk(OperationContext* opCtx, const ChunkType& chunk) { + auto migrateStatus = _chunkSelectionPolicy->selectSpecificChunkToMove(opCtx, chunk); if (!migrateStatus.isOK()) { return migrateStatus.getStatus(); } @@ -259,37 +258,37 @@ Status Balancer::rebalanceSingleChunk(OperationContext* txn, const ChunkType& ch return Status::OK(); } - auto balancerConfig = Grid::get(txn)->getBalancerConfiguration(); - Status refreshStatus = balancerConfig->refreshAndCheck(txn); + auto balancerConfig = Grid::get(opCtx)->getBalancerConfiguration(); + Status refreshStatus = balancerConfig->refreshAndCheck(opCtx); if (!refreshStatus.isOK()) { return refreshStatus; } - return _migrationManager.executeManualMigration(txn, + return _migrationManager.executeManualMigration(opCtx, *migrateInfo, balancerConfig->getMaxChunkSizeBytes(), balancerConfig->getSecondaryThrottle(), balancerConfig->waitForDelete()); } -Status Balancer::moveSingleChunk(OperationContext* txn, +Status Balancer::moveSingleChunk(OperationContext* opCtx, const ChunkType& chunk, const ShardId& newShardId, uint64_t maxChunkSizeBytes, const MigrationSecondaryThrottleOptions& secondaryThrottle, bool waitForDelete) { - auto moveAllowedStatus = _chunkSelectionPolicy->checkMoveAllowed(txn, chunk, newShardId); + auto moveAllowedStatus = _chunkSelectionPolicy->checkMoveAllowed(opCtx, chunk, newShardId); if (!moveAllowedStatus.isOK()) { return moveAllowedStatus; } return _migrationManager.executeManualMigration( - txn, MigrateInfo(newShardId, chunk), maxChunkSizeBytes, secondaryThrottle, waitForDelete); + opCtx, MigrateInfo(newShardId, chunk), maxChunkSizeBytes, secondaryThrottle, waitForDelete); } -void Balancer::report(OperationContext* txn, BSONObjBuilder* builder) { - auto balancerConfig = Grid::get(txn)->getBalancerConfiguration(); - balancerConfig->refreshAndCheck(txn); +void Balancer::report(OperationContext* opCtx, BSONObjBuilder* builder) { + auto balancerConfig = Grid::get(opCtx)->getBalancerConfiguration(); + balancerConfig->refreshAndCheck(opCtx); const auto mode = balancerConfig->getBalancerMode(); @@ -301,14 +300,14 @@ void Balancer::report(OperationContext* txn, BSONObjBuilder* builder) { void Balancer::_mainThread() { Client::initThread("Balancer"); - auto txn = cc().makeOperationContext(); - auto shardingContext = Grid::get(txn.get()); + auto opCtx = cc().makeOperationContext(); + auto shardingContext = Grid::get(opCtx.get()); log() << "CSRS balancer is starting"; { stdx::lock_guard<stdx::mutex> scopedLock(_mutex); - _threadOperationContext = txn.get(); + _threadOperationContext = opCtx.get(); } const Seconds kInitBackoffInterval(10); @@ -316,13 +315,13 @@ void Balancer::_mainThread() { // Take the balancer distributed lock and hold it permanently. Do the attempts with single // attempts in order to not block the thread and be able to check for interrupt more frequently. while (!_stopRequested()) { - auto status = _migrationManager.tryTakeBalancerLock(txn.get(), "CSRS Balancer"); + auto status = _migrationManager.tryTakeBalancerLock(opCtx.get(), "CSRS Balancer"); if (!status.isOK()) { log() << "Balancer distributed lock could not be acquired and will be retried in " << durationCount<Seconds>(kInitBackoffInterval) << " seconds" << causedBy(redact(status)); - _sleepFor(txn.get(), kInitBackoffInterval); + _sleepFor(opCtx.get(), kInitBackoffInterval); continue; } @@ -331,13 +330,13 @@ void Balancer::_mainThread() { auto balancerConfig = shardingContext->getBalancerConfiguration(); while (!_stopRequested()) { - Status refreshStatus = balancerConfig->refreshAndCheck(txn.get()); + Status refreshStatus = balancerConfig->refreshAndCheck(opCtx.get()); if (!refreshStatus.isOK()) { warning() << "Balancer settings could not be loaded and will be retried in " << durationCount<Seconds>(kInitBackoffInterval) << " seconds" << causedBy(refreshStatus); - _sleepFor(txn.get(), kInitBackoffInterval); + _sleepFor(opCtx.get(), kInitBackoffInterval); continue; } @@ -346,8 +345,9 @@ void Balancer::_mainThread() { log() << "CSRS balancer thread is recovering"; - _migrationManager.finishRecovery( - txn.get(), balancerConfig->getMaxChunkSizeBytes(), balancerConfig->getSecondaryThrottle()); + _migrationManager.finishRecovery(opCtx.get(), + balancerConfig->getMaxChunkSizeBytes(), + balancerConfig->getSecondaryThrottle()); log() << "CSRS balancer thread is recovered"; @@ -355,23 +355,23 @@ void Balancer::_mainThread() { while (!_stopRequested()) { BalanceRoundDetails roundDetails; - _beginRound(txn.get()); + _beginRound(opCtx.get()); try { - shardingContext->shardRegistry()->reload(txn.get()); + shardingContext->shardRegistry()->reload(opCtx.get()); - uassert(13258, "oids broken after resetting!", _checkOIDs(txn.get())); + uassert(13258, "oids broken after resetting!", _checkOIDs(opCtx.get())); - Status refreshStatus = balancerConfig->refreshAndCheck(txn.get()); + Status refreshStatus = balancerConfig->refreshAndCheck(opCtx.get()); if (!refreshStatus.isOK()) { warning() << "Skipping balancing round" << causedBy(refreshStatus); - _endRound(txn.get(), kBalanceRoundDefaultInterval); + _endRound(opCtx.get(), kBalanceRoundDefaultInterval); continue; } if (!balancerConfig->shouldBalance()) { LOG(1) << "Skipping balancing round because balancing is disabled"; - _endRound(txn.get(), kBalanceRoundDefaultInterval); + _endRound(opCtx.get(), kBalanceRoundDefaultInterval); continue; } @@ -382,9 +382,9 @@ void Balancer::_mainThread() { << balancerConfig->getSecondaryThrottle().toBSON(); OCCASIONALLY warnOnMultiVersion( - uassertStatusOK(_clusterStats->getStats(txn.get()))); + uassertStatusOK(_clusterStats->getStats(opCtx.get()))); - Status status = _enforceTagRanges(txn.get()); + Status status = _enforceTagRanges(opCtx.get()); if (!status.isOK()) { warning() << "Failed to enforce tag ranges" << causedBy(status); } else { @@ -392,25 +392,25 @@ void Balancer::_mainThread() { } const auto candidateChunks = uassertStatusOK( - _chunkSelectionPolicy->selectChunksToMove(txn.get(), _balancedLastTime)); + _chunkSelectionPolicy->selectChunksToMove(opCtx.get(), _balancedLastTime)); if (candidateChunks.empty()) { LOG(1) << "no need to move any chunk"; _balancedLastTime = false; } else { - _balancedLastTime = _moveChunks(txn.get(), candidateChunks); + _balancedLastTime = _moveChunks(opCtx.get(), candidateChunks); roundDetails.setSucceeded(static_cast<int>(candidateChunks.size()), _balancedLastTime); - shardingContext->catalogClient(txn.get())->logAction( - txn.get(), "balancer.round", "", roundDetails.toBSON()); + shardingContext->catalogClient(opCtx.get()) + ->logAction(opCtx.get(), "balancer.round", "", roundDetails.toBSON()); } LOG(1) << "*** End of balancing round"; } - _endRound(txn.get(), + _endRound(opCtx.get(), _balancedLastTime ? kShortBalanceRoundInterval : kBalanceRoundDefaultInterval); } catch (const std::exception& e) { @@ -422,11 +422,11 @@ void Balancer::_mainThread() { // This round failed, tell the world! roundDetails.setFailed(e.what()); - shardingContext->catalogClient(txn.get())->logAction( - txn.get(), "balancer.round", "", roundDetails.toBSON()); + shardingContext->catalogClient(opCtx.get()) + ->logAction(opCtx.get(), "balancer.round", "", roundDetails.toBSON()); // Sleep a fair amount before retrying because of the error - _endRound(txn.get(), kBalanceRoundDefaultInterval); + _endRound(opCtx.get(), kBalanceRoundDefaultInterval); } } @@ -453,13 +453,13 @@ bool Balancer::_stopRequested() { return (_state != kRunning); } -void Balancer::_beginRound(OperationContext* txn) { +void Balancer::_beginRound(OperationContext* opCtx) { stdx::unique_lock<stdx::mutex> lock(_mutex); _inBalancerRound = true; _condVar.notify_all(); } -void Balancer::_endRound(OperationContext* txn, Seconds waitTimeout) { +void Balancer::_endRound(OperationContext* opCtx, Seconds waitTimeout) { { stdx::lock_guard<stdx::mutex> lock(_mutex); _inBalancerRound = false; @@ -467,16 +467,16 @@ void Balancer::_endRound(OperationContext* txn, Seconds waitTimeout) { _condVar.notify_all(); } - _sleepFor(txn, waitTimeout); + _sleepFor(opCtx, waitTimeout); } -void Balancer::_sleepFor(OperationContext* txn, Seconds waitTimeout) { +void Balancer::_sleepFor(OperationContext* opCtx, Seconds waitTimeout) { stdx::unique_lock<stdx::mutex> lock(_mutex); _condVar.wait_for(lock, waitTimeout.toSystemDuration(), [&] { return _state != kRunning; }); } -bool Balancer::_checkOIDs(OperationContext* txn) { - auto shardingContext = Grid::get(txn); +bool Balancer::_checkOIDs(OperationContext* opCtx) { + auto shardingContext = Grid::get(opCtx); vector<ShardId> all; shardingContext->shardRegistry()->getAllShardIds(&all); @@ -489,14 +489,14 @@ bool Balancer::_checkOIDs(OperationContext* txn) { return false; } - auto shardStatus = shardingContext->shardRegistry()->getShard(txn, shardId); + auto shardStatus = shardingContext->shardRegistry()->getShard(opCtx, shardId); if (!shardStatus.isOK()) { continue; } const auto s = shardStatus.getValue(); auto result = uassertStatusOK( - s->runCommandWithFixedRetryAttempts(txn, + s->runCommandWithFixedRetryAttempts(opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, "admin", BSON("features" << 1), @@ -513,18 +513,18 @@ bool Balancer::_checkOIDs(OperationContext* txn) { << " and " << oids[x]; result = uassertStatusOK(s->runCommandWithFixedRetryAttempts( - txn, + opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, "admin", BSON("features" << 1 << "oidReset" << 1), Shard::RetryPolicy::kIdempotent)); uassertStatusOK(result.commandStatus); - auto otherShardStatus = shardingContext->shardRegistry()->getShard(txn, oids[x]); + auto otherShardStatus = shardingContext->shardRegistry()->getShard(opCtx, oids[x]); if (otherShardStatus.isOK()) { result = uassertStatusOK( otherShardStatus.getValue()->runCommandWithFixedRetryAttempts( - txn, + opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, "admin", BSON("features" << 1 << "oidReset" << 1), @@ -542,25 +542,27 @@ bool Balancer::_checkOIDs(OperationContext* txn) { return true; } -Status Balancer::_enforceTagRanges(OperationContext* txn) { - auto chunksToSplitStatus = _chunkSelectionPolicy->selectChunksToSplit(txn); +Status Balancer::_enforceTagRanges(OperationContext* opCtx) { + auto chunksToSplitStatus = _chunkSelectionPolicy->selectChunksToSplit(opCtx); if (!chunksToSplitStatus.isOK()) { return chunksToSplitStatus.getStatus(); } for (const auto& splitInfo : chunksToSplitStatus.getValue()) { - auto scopedCMStatus = ScopedChunkManager::refreshAndGet(txn, splitInfo.nss); - if (!scopedCMStatus.isOK()) { - return scopedCMStatus.getStatus(); + auto routingInfoStatus = + Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh( + opCtx, splitInfo.nss); + if (!routingInfoStatus.isOK()) { + return routingInfoStatus.getStatus(); } - const auto& scopedCM = scopedCMStatus.getValue(); + auto cm = routingInfoStatus.getValue().cm(); auto splitStatus = - shardutil::splitChunkAtMultiplePoints(txn, + shardutil::splitChunkAtMultiplePoints(opCtx, splitInfo.shardId, splitInfo.nss, - scopedCM.cm()->getShardKeyPattern(), + cm->getShardKeyPattern(), splitInfo.collectionVersion, ChunkRange(splitInfo.minKey, splitInfo.maxKey), splitInfo.splitKeys); @@ -573,9 +575,9 @@ Status Balancer::_enforceTagRanges(OperationContext* txn) { return Status::OK(); } -int Balancer::_moveChunks(OperationContext* txn, +int Balancer::_moveChunks(OperationContext* opCtx, const BalancerChunkSelectionPolicy::MigrateInfoVector& candidateChunks) { - auto balancerConfig = Grid::get(txn)->getBalancerConfiguration(); + auto balancerConfig = Grid::get(opCtx)->getBalancerConfiguration(); // If the balancer was disabled since we started this round, don't start new chunk moves if (_stopRequested() || !balancerConfig->shouldBalance()) { @@ -584,7 +586,7 @@ int Balancer::_moveChunks(OperationContext* txn, } auto migrationStatuses = - _migrationManager.executeMigrationsForAutoBalance(txn, + _migrationManager.executeMigrationsForAutoBalance(opCtx, candidateChunks, balancerConfig->getMaxChunkSizeBytes(), balancerConfig->getSecondaryThrottle(), @@ -614,7 +616,7 @@ int Balancer::_moveChunks(OperationContext* txn, log() << "Performing a split because migration " << redact(requestIt->toString()) << " failed for size reasons" << causedBy(redact(status)); - _splitOrMarkJumbo(txn, NamespaceString(requestIt->ns), requestIt->minKey); + _splitOrMarkJumbo(opCtx, NamespaceString(requestIt->ns), requestIt->minKey); continue; } @@ -625,28 +627,29 @@ int Balancer::_moveChunks(OperationContext* txn, return numChunksProcessed; } -void Balancer::_splitOrMarkJumbo(OperationContext* txn, +void Balancer::_splitOrMarkJumbo(OperationContext* opCtx, const NamespaceString& nss, const BSONObj& minKey) { - auto scopedCM = uassertStatusOK(ScopedChunkManager::refreshAndGet(txn, nss)); - const auto cm = scopedCM.cm().get(); + auto routingInfo = uassertStatusOK( + Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(opCtx, nss)); + const auto cm = routingInfo.cm().get(); auto chunk = cm->findIntersectingChunkWithSimpleCollation(minKey); try { const auto splitPoints = uassertStatusOK(shardutil::selectChunkSplitPoints( - txn, + opCtx, chunk->getShardId(), nss, cm->getShardKeyPattern(), ChunkRange(chunk->getMin(), chunk->getMax()), - Grid::get(txn)->getBalancerConfiguration()->getMaxChunkSizeBytes(), + Grid::get(opCtx)->getBalancerConfiguration()->getMaxChunkSizeBytes(), boost::none)); uassert(ErrorCodes::CannotSplit, "No split points found", !splitPoints.empty()); uassertStatusOK( - shardutil::splitChunkAtMultiplePoints(txn, + shardutil::splitChunkAtMultiplePoints(opCtx, chunk->getShardId(), nss, cm->getShardKeyPattern(), @@ -660,8 +663,8 @@ void Balancer::_splitOrMarkJumbo(OperationContext* txn, const std::string chunkName = ChunkType::genID(nss.ns(), chunk->getMin()); - auto status = Grid::get(txn)->catalogClient(txn)->updateConfigDocument( - txn, + auto status = Grid::get(opCtx)->catalogClient(opCtx)->updateConfigDocument( + opCtx, ChunkType::ConfigNS, BSON(ChunkType::name(chunkName)), BSON("$set" << BSON(ChunkType::jumbo(true))), diff --git a/src/mongo/db/s/balancer/balancer_chunk_selection_policy_impl.cpp b/src/mongo/db/s/balancer/balancer_chunk_selection_policy_impl.cpp index 4f3905b61bd..5d96be7004b 100644 --- a/src/mongo/db/s/balancer/balancer_chunk_selection_policy_impl.cpp +++ b/src/mongo/db/s/balancer/balancer_chunk_selection_policy_impl.cpp @@ -43,7 +43,6 @@ #include "mongo/s/catalog/type_tags.h" #include "mongo/s/catalog_cache.h" #include "mongo/s/grid.h" -#include "mongo/s/sharding_raii.h" #include "mongo/stdx/memory.h" #include "mongo/util/log.h" #include "mongo/util/mongoutils/str.h" @@ -63,7 +62,7 @@ namespace { * distrubution and chunk placement information which is needed by the balancer policy. */ StatusWith<DistributionStatus> createCollectionDistributionStatus( - OperationContext* txn, const ShardStatisticsVector& allShards, ChunkManager* chunkMgr) { + OperationContext* opCtx, const ShardStatisticsVector& allShards, ChunkManager* chunkMgr) { ShardToChunksMap shardToChunksMap; // Makes sure there is an entry in shardToChunksMap for every shard, so empty shards will also @@ -72,7 +71,7 @@ StatusWith<DistributionStatus> createCollectionDistributionStatus( shardToChunksMap[stat.shardId]; } - for (const auto& entry : chunkMgr->getChunkMap()) { + for (const auto& entry : chunkMgr->chunkMap()) { const auto& chunkEntry = entry.second; ChunkType chunk; @@ -87,8 +86,8 @@ StatusWith<DistributionStatus> createCollectionDistributionStatus( } vector<TagsType> collectionTags; - Status tagsStatus = Grid::get(txn)->catalogClient(txn)->getTagsForCollection( - txn, chunkMgr->getns(), &collectionTags); + Status tagsStatus = Grid::get(opCtx)->catalogClient(opCtx)->getTagsForCollection( + opCtx, chunkMgr->getns(), &collectionTags); if (!tagsStatus.isOK()) { return {tagsStatus.code(), str::stream() << "Unable to load tags for collection " << chunkMgr->getns() @@ -186,8 +185,8 @@ BalancerChunkSelectionPolicyImpl::BalancerChunkSelectionPolicyImpl(ClusterStatis BalancerChunkSelectionPolicyImpl::~BalancerChunkSelectionPolicyImpl() = default; StatusWith<SplitInfoVector> BalancerChunkSelectionPolicyImpl::selectChunksToSplit( - OperationContext* txn) { - auto shardStatsStatus = _clusterStats->getStats(txn); + OperationContext* opCtx) { + auto shardStatsStatus = _clusterStats->getStats(opCtx); if (!shardStatsStatus.isOK()) { return shardStatsStatus.getStatus(); } @@ -196,8 +195,8 @@ StatusWith<SplitInfoVector> BalancerChunkSelectionPolicyImpl::selectChunksToSpli vector<CollectionType> collections; - Status collsStatus = - Grid::get(txn)->catalogClient(txn)->getCollections(txn, nullptr, &collections, nullptr); + Status collsStatus = Grid::get(opCtx)->catalogClient(opCtx)->getCollections( + opCtx, nullptr, &collections, nullptr); if (!collsStatus.isOK()) { return collsStatus; } @@ -215,7 +214,7 @@ StatusWith<SplitInfoVector> BalancerChunkSelectionPolicyImpl::selectChunksToSpli const NamespaceString nss(coll.getNs()); - auto candidatesStatus = _getSplitCandidatesForCollection(txn, nss, shardStats); + auto candidatesStatus = _getSplitCandidatesForCollection(opCtx, nss, shardStats); if (candidatesStatus == ErrorCodes::NamespaceNotFound) { // Namespace got dropped before we managed to get to it, so just skip it continue; @@ -234,8 +233,8 @@ StatusWith<SplitInfoVector> BalancerChunkSelectionPolicyImpl::selectChunksToSpli } StatusWith<MigrateInfoVector> BalancerChunkSelectionPolicyImpl::selectChunksToMove( - OperationContext* txn, bool aggressiveBalanceHint) { - auto shardStatsStatus = _clusterStats->getStats(txn); + OperationContext* opCtx, bool aggressiveBalanceHint) { + auto shardStatsStatus = _clusterStats->getStats(opCtx); if (!shardStatsStatus.isOK()) { return shardStatsStatus.getStatus(); } @@ -248,8 +247,8 @@ StatusWith<MigrateInfoVector> BalancerChunkSelectionPolicyImpl::selectChunksToMo vector<CollectionType> collections; - Status collsStatus = - Grid::get(txn)->catalogClient(txn)->getCollections(txn, nullptr, &collections, nullptr); + Status collsStatus = Grid::get(opCtx)->catalogClient(opCtx)->getCollections( + opCtx, nullptr, &collections, nullptr); if (!collsStatus.isOK()) { return collsStatus; } @@ -273,7 +272,7 @@ StatusWith<MigrateInfoVector> BalancerChunkSelectionPolicyImpl::selectChunksToMo } auto candidatesStatus = - _getMigrateCandidatesForCollection(txn, nss, shardStats, aggressiveBalanceHint); + _getMigrateCandidatesForCollection(opCtx, nss, shardStats, aggressiveBalanceHint); if (candidatesStatus == ErrorCodes::NamespaceNotFound) { // Namespace got dropped before we managed to get to it, so just skip it continue; @@ -292,26 +291,25 @@ StatusWith<MigrateInfoVector> BalancerChunkSelectionPolicyImpl::selectChunksToMo } StatusWith<boost::optional<MigrateInfo>> -BalancerChunkSelectionPolicyImpl::selectSpecificChunkToMove(OperationContext* txn, +BalancerChunkSelectionPolicyImpl::selectSpecificChunkToMove(OperationContext* opCtx, const ChunkType& chunk) { - auto shardStatsStatus = _clusterStats->getStats(txn); + auto shardStatsStatus = _clusterStats->getStats(opCtx); if (!shardStatsStatus.isOK()) { return shardStatsStatus.getStatus(); } - const auto shardStats = std::move(shardStatsStatus.getValue()); - - const NamespaceString nss(chunk.getNS()); + const auto& shardStats = shardStatsStatus.getValue(); - auto scopedCMStatus = ScopedChunkManager::refreshAndGet(txn, nss); - if (!scopedCMStatus.isOK()) { - return scopedCMStatus.getStatus(); + auto routingInfoStatus = + Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(opCtx, + chunk.getNS()); + if (!routingInfoStatus.isOK()) { + return routingInfoStatus.getStatus(); } - const auto& scopedCM = scopedCMStatus.getValue(); - const auto cm = scopedCM.cm().get(); + const auto cm = routingInfoStatus.getValue().cm().get(); - const auto collInfoStatus = createCollectionDistributionStatus(txn, shardStats, cm); + const auto collInfoStatus = createCollectionDistributionStatus(opCtx, shardStats, cm); if (!collInfoStatus.isOK()) { return collInfoStatus.getStatus(); } @@ -321,27 +319,26 @@ BalancerChunkSelectionPolicyImpl::selectSpecificChunkToMove(OperationContext* tx return BalancerPolicy::balanceSingleChunk(chunk, shardStats, distribution); } -Status BalancerChunkSelectionPolicyImpl::checkMoveAllowed(OperationContext* txn, +Status BalancerChunkSelectionPolicyImpl::checkMoveAllowed(OperationContext* opCtx, const ChunkType& chunk, const ShardId& newShardId) { - auto shardStatsStatus = _clusterStats->getStats(txn); + auto shardStatsStatus = _clusterStats->getStats(opCtx); if (!shardStatsStatus.isOK()) { return shardStatsStatus.getStatus(); } auto shardStats = std::move(shardStatsStatus.getValue()); - const NamespaceString nss(chunk.getNS()); - - auto scopedCMStatus = ScopedChunkManager::refreshAndGet(txn, nss); - if (!scopedCMStatus.isOK()) { - return scopedCMStatus.getStatus(); + auto routingInfoStatus = + Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(opCtx, + chunk.getNS()); + if (!routingInfoStatus.isOK()) { + return routingInfoStatus.getStatus(); } - const auto& scopedCM = scopedCMStatus.getValue(); - const auto cm = scopedCM.cm().get(); + const auto cm = routingInfoStatus.getValue().cm().get(); - const auto collInfoStatus = createCollectionDistributionStatus(txn, shardStats, cm); + const auto collInfoStatus = createCollectionDistributionStatus(opCtx, shardStats, cm); if (!collInfoStatus.isOK()) { return collInfoStatus.getStatus(); } @@ -365,18 +362,18 @@ Status BalancerChunkSelectionPolicyImpl::checkMoveAllowed(OperationContext* txn, } StatusWith<SplitInfoVector> BalancerChunkSelectionPolicyImpl::_getSplitCandidatesForCollection( - OperationContext* txn, const NamespaceString& nss, const ShardStatisticsVector& shardStats) { - auto scopedCMStatus = ScopedChunkManager::refreshAndGet(txn, nss); - if (!scopedCMStatus.isOK()) { - return scopedCMStatus.getStatus(); + OperationContext* opCtx, const NamespaceString& nss, const ShardStatisticsVector& shardStats) { + auto routingInfoStatus = + Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(opCtx, nss); + if (!routingInfoStatus.isOK()) { + return routingInfoStatus.getStatus(); } - const auto& scopedCM = scopedCMStatus.getValue(); - const auto cm = scopedCM.cm().get(); + const auto cm = routingInfoStatus.getValue().cm().get(); const auto& shardKeyPattern = cm->getShardKeyPattern().getKeyPattern(); - const auto collInfoStatus = createCollectionDistributionStatus(txn, shardStats, cm); + const auto collInfoStatus = createCollectionDistributionStatus(opCtx, shardStats, cm); if (!collInfoStatus.isOK()) { return collInfoStatus.getStatus(); } @@ -416,21 +413,21 @@ StatusWith<SplitInfoVector> BalancerChunkSelectionPolicyImpl::_getSplitCandidate } StatusWith<MigrateInfoVector> BalancerChunkSelectionPolicyImpl::_getMigrateCandidatesForCollection( - OperationContext* txn, + OperationContext* opCtx, const NamespaceString& nss, const ShardStatisticsVector& shardStats, bool aggressiveBalanceHint) { - auto scopedCMStatus = ScopedChunkManager::refreshAndGet(txn, nss); - if (!scopedCMStatus.isOK()) { - return scopedCMStatus.getStatus(); + auto routingInfoStatus = + Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(opCtx, nss); + if (!routingInfoStatus.isOK()) { + return routingInfoStatus.getStatus(); } - const auto& scopedCM = scopedCMStatus.getValue(); - const auto cm = scopedCM.cm().get(); + const auto cm = routingInfoStatus.getValue().cm().get(); const auto& shardKeyPattern = cm->getShardKeyPattern().getKeyPattern(); - const auto collInfoStatus = createCollectionDistributionStatus(txn, shardStats, cm); + const auto collInfoStatus = createCollectionDistributionStatus(opCtx, shardStats, cm); if (!collInfoStatus.isOK()) { return collInfoStatus.getStatus(); } diff --git a/src/mongo/db/s/balancer/migration_manager.cpp b/src/mongo/db/s/balancer/migration_manager.cpp index cbf269de1a8..2191a71e7a5 100644 --- a/src/mongo/db/s/balancer/migration_manager.cpp +++ b/src/mongo/db/s/balancer/migration_manager.cpp @@ -43,10 +43,10 @@ #include "mongo/executor/task_executor_pool.h" #include "mongo/rpc/get_status_from_command_result.h" #include "mongo/s/catalog/sharding_catalog_client.h" +#include "mongo/s/catalog_cache.h" #include "mongo/s/client/shard_registry.h" #include "mongo/s/grid.h" #include "mongo/s/move_chunk_request.h" -#include "mongo/s/sharding_raii.h" #include "mongo/util/log.h" #include "mongo/util/net/hostandport.h" #include "mongo/util/scopeguard.h" @@ -88,14 +88,14 @@ Status extractMigrationStatusFromCommandResponse(const BSONObj& commandResponse) /** * Blocking call to acquire the distributed collection lock for the specified namespace. */ -StatusWith<DistLockHandle> acquireDistLock(OperationContext* txn, +StatusWith<DistLockHandle> acquireDistLock(OperationContext* opCtx, const OID& lockSessionID, const NamespaceString& nss) { const std::string whyMessage(stream() << "Migrating chunk(s) in collection " << nss.ns()); auto statusWithDistLockHandle = - Grid::get(txn)->catalogClient(txn)->getDistLockManager()->lockWithSessionID( - txn, nss.ns(), whyMessage, lockSessionID, DistLockManager::kSingleLockAttemptTimeout); + Grid::get(opCtx)->catalogClient(opCtx)->getDistLockManager()->lockWithSessionID( + opCtx, nss.ns(), whyMessage, lockSessionID, DistLockManager::kSingleLockAttemptTimeout); if (!statusWithDistLockHandle.isOK()) { // If we get LockBusy while trying to acquire the collection distributed lock, this implies @@ -138,7 +138,7 @@ MigrationManager::~MigrationManager() { } MigrationStatuses MigrationManager::executeMigrationsForAutoBalance( - OperationContext* txn, + OperationContext* opCtx, const vector<MigrateInfo>& migrateInfos, uint64_t maxChunkSizeBytes, const MigrationSecondaryThrottleOptions& secondaryThrottle, @@ -156,7 +156,7 @@ MigrationStatuses MigrationManager::executeMigrationsForAutoBalance( // Write a document to the config.migrations collection, in case this migration must be // recovered by the Balancer. Fail if the chunk is already moving. auto statusWithScopedMigrationRequest = - ScopedMigrationRequest::writeMigration(txn, migrateInfo, waitForDelete); + ScopedMigrationRequest::writeMigration(opCtx, migrateInfo, waitForDelete); if (!statusWithScopedMigrationRequest.isOK()) { migrationStatuses.emplace(migrateInfo.getName(), std::move(statusWithScopedMigrationRequest.getStatus())); @@ -165,7 +165,7 @@ MigrationStatuses MigrationManager::executeMigrationsForAutoBalance( scopedMigrationRequests.emplace(migrateInfo.getName(), std::move(statusWithScopedMigrationRequest.getValue())); - responses.emplace_back(_schedule(txn, + responses.emplace_back(_schedule(opCtx, migrateInfo, false, // Config server takes the collection dist lock maxChunkSizeBytes, @@ -200,7 +200,7 @@ MigrationStatuses MigrationManager::executeMigrationsForAutoBalance( // Write a document to the config.migrations collection, in case this migration must be // recovered by the Balancer. Fail if the chunk is already moving. auto statusWithScopedMigrationRequest = - ScopedMigrationRequest::writeMigration(txn, migrateInfo, waitForDelete); + ScopedMigrationRequest::writeMigration(opCtx, migrateInfo, waitForDelete); if (!statusWithScopedMigrationRequest.isOK()) { migrationStatuses.emplace(migrateInfo.getName(), std::move(statusWithScopedMigrationRequest.getStatus())); @@ -208,7 +208,7 @@ MigrationStatuses MigrationManager::executeMigrationsForAutoBalance( } RemoteCommandResponse remoteCommandResponse = - _schedule(txn, + _schedule(opCtx, migrateInfo, true, // Shard takes the collection dist lock maxChunkSizeBytes, @@ -228,7 +228,7 @@ MigrationStatuses MigrationManager::executeMigrationsForAutoBalance( } Status MigrationManager::executeManualMigration( - OperationContext* txn, + OperationContext* opCtx, const MigrateInfo& migrateInfo, uint64_t maxChunkSizeBytes, const MigrationSecondaryThrottleOptions& secondaryThrottle, @@ -238,28 +238,25 @@ Status MigrationManager::executeManualMigration( // Write a document to the config.migrations collection, in case this migration must be // recovered by the Balancer. Fail if the chunk is already moving. auto statusWithScopedMigrationRequest = - ScopedMigrationRequest::writeMigration(txn, migrateInfo, waitForDelete); + ScopedMigrationRequest::writeMigration(opCtx, migrateInfo, waitForDelete); if (!statusWithScopedMigrationRequest.isOK()) { return statusWithScopedMigrationRequest.getStatus(); } RemoteCommandResponse remoteCommandResponse = - _schedule(txn, - migrateInfo, - false, // Config server takes the collection dist lock - maxChunkSizeBytes, - secondaryThrottle, - waitForDelete) + _schedule(opCtx, migrateInfo, false, maxChunkSizeBytes, secondaryThrottle, waitForDelete) ->get(); - auto scopedCMStatus = ScopedChunkManager::refreshAndGet(txn, NamespaceString(migrateInfo.ns)); - if (!scopedCMStatus.isOK()) { - return scopedCMStatus.getStatus(); + auto routingInfoStatus = + Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh( + opCtx, migrateInfo.ns); + if (!routingInfoStatus.isOK()) { + return routingInfoStatus.getStatus(); } - const auto& scopedCM = scopedCMStatus.getValue(); + auto& routingInfo = routingInfoStatus.getValue(); - auto chunk = scopedCM.cm()->findIntersectingChunkWithSimpleCollation(migrateInfo.minKey); + auto chunk = routingInfo.cm()->findIntersectingChunkWithSimpleCollation(migrateInfo.minKey); invariant(chunk); Status commandStatus = _processRemoteCommandResponse( @@ -276,7 +273,7 @@ Status MigrationManager::executeManualMigration( return commandStatus; } -void MigrationManager::startRecoveryAndAcquireDistLocks(OperationContext* txn) { +void MigrationManager::startRecoveryAndAcquireDistLocks(OperationContext* opCtx) { { stdx::lock_guard<stdx::mutex> lock(_mutex); invariant(_state == State::kStopped); @@ -286,14 +283,14 @@ void MigrationManager::startRecoveryAndAcquireDistLocks(OperationContext* txn) { auto scopedGuard = MakeGuard([&] { _migrationRecoveryMap.clear(); - _abandonActiveMigrationsAndEnableManager(txn); + _abandonActiveMigrationsAndEnableManager(opCtx); }); - auto distLockManager = Grid::get(txn)->catalogClient(txn)->getDistLockManager(); + auto distLockManager = Grid::get(opCtx)->catalogClient(opCtx)->getDistLockManager(); // Must claim the balancer lock to prevent any 3.2 mongos clients from acquiring it. auto balancerLockStatus = distLockManager->tryLockWithLocalWriteConcern( - txn, "balancer", "CSRS Balancer", _lockSessionID); + opCtx, "balancer", "CSRS Balancer", _lockSessionID); if (!balancerLockStatus.isOK()) { log() << "Failed to acquire balancer distributed lock. Abandoning balancer recovery." << causedBy(redact(balancerLockStatus.getStatus())); @@ -302,8 +299,8 @@ void MigrationManager::startRecoveryAndAcquireDistLocks(OperationContext* txn) { // Load the active migrations from the config.migrations collection. auto statusWithMigrationsQueryResponse = - Grid::get(txn)->shardRegistry()->getConfigShard()->exhaustiveFindOnConfig( - txn, + Grid::get(opCtx)->shardRegistry()->getConfigShard()->exhaustiveFindOnConfig( + opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, repl::ReadConcernLevel::kLocalReadConcern, NamespaceString(MigrationType::ConfigNS), @@ -341,7 +338,7 @@ void MigrationManager::startRecoveryAndAcquireDistLocks(OperationContext* txn) { << migrateType.getNss().ns()); auto statusWithDistLockHandle = distLockManager->tryLockWithLocalWriteConcern( - txn, migrateType.getNss().ns(), whyMessage, _lockSessionID); + opCtx, migrateType.getNss().ns(), whyMessage, _lockSessionID); if (!statusWithDistLockHandle.isOK() && statusWithDistLockHandle.getStatus() != ErrorCodes::LockBusy) { // LockBusy is alright because that should mean a 3.2 shard has it for the active @@ -361,7 +358,7 @@ void MigrationManager::startRecoveryAndAcquireDistLocks(OperationContext* txn) { scopedGuard.Dismiss(); } -void MigrationManager::finishRecovery(OperationContext* txn, +void MigrationManager::finishRecovery(OperationContext* opCtx, uint64_t maxChunkSizeBytes, const MigrationSecondaryThrottleOptions& secondaryThrottle) { { @@ -382,7 +379,7 @@ void MigrationManager::finishRecovery(OperationContext* txn, auto scopedGuard = MakeGuard([&] { _migrationRecoveryMap.clear(); - _abandonActiveMigrationsAndEnableManager(txn); + _abandonActiveMigrationsAndEnableManager(opCtx); }); // Schedule recovered migrations. @@ -394,18 +391,20 @@ void MigrationManager::finishRecovery(OperationContext* txn, auto& migrateInfos = nssAndMigrateInfos.second; invariant(!migrateInfos.empty()); - auto scopedCMStatus = ScopedChunkManager::refreshAndGet(txn, nss); - if (!scopedCMStatus.isOK()) { + auto routingInfoStatus = + Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(opCtx, + nss); + if (!routingInfoStatus.isOK()) { // This shouldn't happen because the collection was intact and sharded when the previous // config primary was active and the dist locks have been held by the balancer // throughout. Abort migration recovery. log() << "Unable to reload chunk metadata for collection '" << nss << "' during balancer recovery. Abandoning recovery." - << causedBy(redact(scopedCMStatus.getStatus())); + << causedBy(redact(routingInfoStatus.getStatus())); return; } - const auto& scopedCM = scopedCMStatus.getValue(); + auto& routingInfo = routingInfoStatus.getValue(); int scheduledMigrations = 0; @@ -416,22 +415,22 @@ void MigrationManager::finishRecovery(OperationContext* txn, migrateInfos.pop_front(); auto chunk = - scopedCM.cm()->findIntersectingChunkWithSimpleCollation(migrationInfo.minKey); + routingInfo.cm()->findIntersectingChunkWithSimpleCollation(migrationInfo.minKey); invariant(chunk); if (chunk->getShardId() != migrationInfo.from) { // Chunk is no longer on the source shard specified by this migration. Erase the // migration recovery document associated with it. - ScopedMigrationRequest::createForRecovery(txn, nss, migrationInfo.minKey); + ScopedMigrationRequest::createForRecovery(opCtx, nss, migrationInfo.minKey); continue; } scopedMigrationRequests.emplace_back( - ScopedMigrationRequest::createForRecovery(txn, nss, migrationInfo.minKey)); + ScopedMigrationRequest::createForRecovery(opCtx, nss, migrationInfo.minKey)); scheduledMigrations++; - responses.emplace_back(_schedule(txn, + responses.emplace_back(_schedule(opCtx, migrationInfo, false, // Config server takes the collection dist lock maxChunkSizeBytes, @@ -441,8 +440,8 @@ void MigrationManager::finishRecovery(OperationContext* txn, // If no migrations were scheduled for this namespace, free the dist lock if (!scheduledMigrations) { - Grid::get(txn)->catalogClient(txn)->getDistLockManager()->unlock( - txn, _lockSessionID, nss.ns()); + Grid::get(opCtx)->catalogClient(opCtx)->getDistLockManager()->unlock( + opCtx, _lockSessionID, nss.ns()); } } @@ -507,7 +506,7 @@ void MigrationManager::drainActiveMigrations() { } shared_ptr<Notification<RemoteCommandResponse>> MigrationManager::_schedule( - OperationContext* txn, + OperationContext* opCtx, const MigrateInfo& migrateInfo, bool shardTakesCollectionDistLock, uint64_t maxChunkSizeBytes, @@ -525,16 +524,16 @@ shared_ptr<Notification<RemoteCommandResponse>> MigrationManager::_schedule( } } - // Sanity checks that the chunk being migrated is actually valid. These will be repeated at the // shard as well, but doing them here saves an extra network call, which might otherwise fail. - auto statusWithScopedChunkManager = ScopedChunkManager::refreshAndGet(txn, nss); - if (!statusWithScopedChunkManager.isOK()) { + auto routingInfoStatus = + Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(opCtx, nss); + if (!routingInfoStatus.isOK()) { return std::make_shared<Notification<RemoteCommandResponse>>( - std::move(statusWithScopedChunkManager.getStatus())); + std::move(routingInfoStatus.getStatus())); } - auto const chunkManager = statusWithScopedChunkManager.getValue().cm(); + auto const chunkManager = routingInfoStatus.getValue().cm(); auto chunk = chunkManager->findIntersectingChunkWithSimpleCollation(migrateInfo.minKey); invariant(chunk); @@ -548,15 +547,16 @@ shared_ptr<Notification<RemoteCommandResponse>> MigrationManager::_schedule( << " does not exist.")); } - const auto fromShardStatus = Grid::get(txn)->shardRegistry()->getShard(txn, migrateInfo.from); + const auto fromShardStatus = + Grid::get(opCtx)->shardRegistry()->getShard(opCtx, migrateInfo.from); if (!fromShardStatus.isOK()) { return std::make_shared<Notification<RemoteCommandResponse>>( std::move(fromShardStatus.getStatus())); } const auto fromShard = fromShardStatus.getValue(); - auto fromHostStatus = - fromShard->getTargeter()->findHost(txn, ReadPreferenceSetting{ReadPreference::PrimaryOnly}); + auto fromHostStatus = fromShard->getTargeter()->findHost( + opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly}); if (!fromHostStatus.isOK()) { return std::make_shared<Notification<RemoteCommandResponse>>( std::move(fromHostStatus.getStatus())); @@ -567,7 +567,7 @@ shared_ptr<Notification<RemoteCommandResponse>> MigrationManager::_schedule( &builder, nss, chunkManager->getVersion(), - Grid::get(txn)->shardRegistry()->getConfigServerConnectionString(), + Grid::get(opCtx)->shardRegistry()->getConfigServerConnectionString(), migrateInfo.from, migrateInfo.to, ChunkRange(migrateInfo.minKey, migrateInfo.maxKey), @@ -590,25 +590,26 @@ shared_ptr<Notification<RemoteCommandResponse>> MigrationManager::_schedule( auto retVal = migration.completionNotification; if (shardTakesCollectionDistLock) { - _scheduleWithoutDistLock_inlock(txn, fromHostStatus.getValue(), std::move(migration)); + _scheduleWithoutDistLock_inlock(opCtx, fromHostStatus.getValue(), std::move(migration)); } else { - _scheduleWithDistLock_inlock(txn, fromHostStatus.getValue(), std::move(migration)); + _scheduleWithDistLock_inlock(opCtx, fromHostStatus.getValue(), std::move(migration)); } return retVal; } -void MigrationManager::_scheduleWithDistLock_inlock(OperationContext* txn, +void MigrationManager::_scheduleWithDistLock_inlock(OperationContext* opCtx, const HostAndPort& targetHost, Migration migration) { - executor::TaskExecutor* const executor = Grid::get(txn)->getExecutorPool()->getFixedExecutor(); + executor::TaskExecutor* const executor = + Grid::get(opCtx)->getExecutorPool()->getFixedExecutor(); const NamespaceString nss(migration.nss); auto it = _activeMigrationsWithDistLock.find(nss); if (it == _activeMigrationsWithDistLock.end()) { // Acquire the collection distributed lock (blocking call) - auto distLockHandleStatus = acquireDistLock(txn, _lockSessionID, nss); + auto distLockHandleStatus = acquireDistLock(opCtx, _lockSessionID, nss); if (!distLockHandleStatus.isOK()) { migration.completionNotification->set(distLockHandleStatus.getStatus()); return; @@ -627,7 +628,7 @@ void MigrationManager::_scheduleWithDistLock_inlock(OperationContext* txn, auto itMigration = collectionMigrationState->migrations.begin(); const RemoteCommandRequest remoteRequest( - targetHost, NamespaceString::kAdminDb.toString(), itMigration->moveChunkCmdObj, txn); + targetHost, NamespaceString::kAdminDb.toString(), itMigration->moveChunkCmdObj, opCtx); StatusWith<executor::TaskExecutor::CallbackHandle> callbackHandleWithStatus = executor->scheduleRemoteCommand( @@ -636,10 +637,10 @@ void MigrationManager::_scheduleWithDistLock_inlock(OperationContext* txn, const executor::TaskExecutor::RemoteCommandCallbackArgs& args) { Client::initThread(getThreadName().c_str()); ON_BLOCK_EXIT([&] { Client::destroy(); }); - auto txn = cc().makeOperationContext(); + auto opCtx = cc().makeOperationContext(); stdx::lock_guard<stdx::mutex> lock(_mutex); - _completeWithDistLock_inlock(txn.get(), itMigration, args.response); + _completeWithDistLock_inlock(opCtx.get(), itMigration, args.response); }); if (callbackHandleWithStatus.isOK()) { @@ -647,11 +648,12 @@ void MigrationManager::_scheduleWithDistLock_inlock(OperationContext* txn, return; } - _completeWithDistLock_inlock(txn, itMigration, std::move(callbackHandleWithStatus.getStatus())); + _completeWithDistLock_inlock( + opCtx, itMigration, std::move(callbackHandleWithStatus.getStatus())); } void MigrationManager::_completeWithDistLock_inlock( - OperationContext* txn, + OperationContext* opCtx, MigrationsList::iterator itMigration, const RemoteCommandResponse& remoteCommandResponse) { const NamespaceString nss(itMigration->nss); @@ -668,8 +670,8 @@ void MigrationManager::_completeWithDistLock_inlock( collectionMigrationState->migrations.erase(itMigration); if (collectionMigrationState->migrations.empty()) { - Grid::get(txn)->catalogClient(txn)->getDistLockManager()->unlock( - txn, collectionMigrationState->distLockHandle, nss.ns()); + Grid::get(opCtx)->catalogClient(opCtx)->getDistLockManager()->unlock( + opCtx, collectionMigrationState->distLockHandle, nss.ns()); _activeMigrationsWithDistLock.erase(it); _checkDrained_inlock(); } @@ -677,16 +679,17 @@ void MigrationManager::_completeWithDistLock_inlock( notificationToSignal->set(remoteCommandResponse); } -void MigrationManager::_scheduleWithoutDistLock_inlock(OperationContext* txn, +void MigrationManager::_scheduleWithoutDistLock_inlock(OperationContext* opCtx, const HostAndPort& targetHost, Migration migration) { - executor::TaskExecutor* const executor = Grid::get(txn)->getExecutorPool()->getFixedExecutor(); + executor::TaskExecutor* const executor = + Grid::get(opCtx)->getExecutorPool()->getFixedExecutor(); _activeMigrationsWithoutDistLock.push_front(std::move(migration)); auto itMigration = _activeMigrationsWithoutDistLock.begin(); const RemoteCommandRequest remoteRequest( - targetHost, NamespaceString::kAdminDb.toString(), itMigration->moveChunkCmdObj, txn); + targetHost, NamespaceString::kAdminDb.toString(), itMigration->moveChunkCmdObj, opCtx); StatusWith<executor::TaskExecutor::CallbackHandle> callbackHandleWithStatus = executor->scheduleRemoteCommand( @@ -731,7 +734,7 @@ void MigrationManager::_waitForRecovery() { _condVar.wait(lock, [this] { return _state != State::kRecovering; }); } -void MigrationManager::_abandonActiveMigrationsAndEnableManager(OperationContext* txn) { +void MigrationManager::_abandonActiveMigrationsAndEnableManager(OperationContext* opCtx) { stdx::unique_lock<stdx::mutex> lock(_mutex); if (_state == State::kStopping) { // The balancer was interrupted. Let the next balancer recover the state. @@ -739,16 +742,16 @@ void MigrationManager::_abandonActiveMigrationsAndEnableManager(OperationContext } invariant(_state == State::kRecovering); - auto catalogClient = Grid::get(txn)->catalogClient(txn); + auto catalogClient = Grid::get(opCtx)->catalogClient(opCtx); // Unlock all balancer distlocks we aren't using anymore. auto distLockManager = catalogClient->getDistLockManager(); - distLockManager->unlockAll(txn, distLockManager->getProcessID()); + distLockManager->unlockAll(opCtx, distLockManager->getProcessID()); // Clear the config.migrations collection so that those chunks can be scheduled for migration // again. catalogClient->removeConfigDocuments( - txn, MigrationType::ConfigNS, BSONObj(), kMajorityWriteConcern); + opCtx, MigrationType::ConfigNS, BSONObj(), kMajorityWriteConcern); _state = State::kEnabled; _condVar.notify_all(); @@ -799,12 +802,15 @@ Status MigrationManager::_processRemoteCommandResponse( return commandStatus; } -Status MigrationManager::tryTakeBalancerLock(OperationContext* txn, StringData whyMessage) { - return Grid::get(txn) - ->catalogClient(txn) +Status MigrationManager::tryTakeBalancerLock(OperationContext* opCtx, StringData whyMessage) { + return Grid::get(opCtx) + ->catalogClient(opCtx) ->getDistLockManager() - ->lockWithSessionID( - txn, "balancer", whyMessage, _lockSessionID, DistLockManager::kSingleLockAttemptTimeout) + ->lockWithSessionID(opCtx, + "balancer", + whyMessage, + _lockSessionID, + DistLockManager::kSingleLockAttemptTimeout) .getStatus(); } diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp index 30c69a6f5ef..d503e3aeaa0 100644 --- a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp +++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp @@ -48,7 +48,6 @@ #include "mongo/executor/task_executor.h" #include "mongo/executor/task_executor_pool.h" #include "mongo/rpc/get_status_from_command_result.h" -#include "mongo/s/chunk.h" #include "mongo/s/client/shard_registry.h" #include "mongo/s/grid.h" #include "mongo/util/elapsed_tracker.h" diff --git a/src/mongo/db/s/split_vector_command.cpp b/src/mongo/db/s/split_vector_command.cpp index f02a7d68fa3..ba4c126680b 100644 --- a/src/mongo/db/s/split_vector_command.cpp +++ b/src/mongo/db/s/split_vector_command.cpp @@ -50,7 +50,6 @@ #include "mongo/db/keypattern.h" #include "mongo/db/query/internal_plans.h" #include "mongo/s/catalog/type_chunk.h" -#include "mongo/s/chunk.h" #include "mongo/util/log.h" #include "mongo/util/timer.h" diff --git a/src/mongo/s/SConscript b/src/mongo/s/SConscript index 492a7c01431..2440b3fcb69 100644 --- a/src/mongo/s/SConscript +++ b/src/mongo/s/SConscript @@ -213,12 +213,10 @@ env.Library( 'chunk.cpp', 'chunk_manager.cpp', 'cluster_identity_loader.cpp', - 'config.cpp', 'config_server_client.cpp', 'grid.cpp', 'shard_util.cpp', 'sharding_egress_metadata_hook.cpp', - 'sharding_raii.cpp', ], LIBDEPS=[ '$BUILD_DIR/mongo/db/audit', @@ -235,8 +233,10 @@ env.Library( env.CppUnitTest( target='catalog_cache_test', source=[ - 'chunk_manager_test.cpp', 'chunk_manager_index_bounds_test.cpp', + 'chunk_manager_query_test.cpp', + 'chunk_manager_refresh_test.cpp', + 'chunk_manager_test_fixture.cpp', ], LIBDEPS=[ '$BUILD_DIR/mongo/s/catalog/sharding_catalog_test_fixture', diff --git a/src/mongo/s/catalog/type_chunk.cpp b/src/mongo/s/catalog/type_chunk.cpp index e0cd4c1215a..5965644ce41 100644 --- a/src/mongo/s/catalog/type_chunk.cpp +++ b/src/mongo/s/catalog/type_chunk.cpp @@ -122,6 +122,15 @@ bool ChunkRange::operator!=(const ChunkRange& other) const { return !(*this == other); } +ChunkType::ChunkType() = default; + +ChunkType::ChunkType(NamespaceString nss, ChunkRange range, ChunkVersion version, ShardId shardId) + : _ns(nss.ns()), + _min(range.getMin()), + _max(range.getMax()), + _version(version), + _shard(std::move(shardId)) {} + StatusWith<ChunkType> ChunkType::fromBSON(const BSONObj& source) { ChunkType chunk; diff --git a/src/mongo/s/catalog/type_chunk.h b/src/mongo/s/catalog/type_chunk.h index de468e21f79..40a8effece1 100644 --- a/src/mongo/s/catalog/type_chunk.h +++ b/src/mongo/s/catalog/type_chunk.h @@ -32,6 +32,7 @@ #include <string> #include "mongo/bson/bsonobj.h" +#include "mongo/db/namespace_string.h" #include "mongo/s/chunk_version.h" #include "mongo/s/shard_id.h" @@ -107,6 +108,9 @@ public: static const BSONField<Date_t> DEPRECATED_lastmod; static const BSONField<OID> DEPRECATED_epoch; + ChunkType(); + ChunkType(NamespaceString nss, ChunkRange range, ChunkVersion version, ShardId shardId); + /** * Constructs a new ChunkType object from BSON. * Also does validation of the contents. diff --git a/src/mongo/s/catalog_cache.cpp b/src/mongo/s/catalog_cache.cpp index b8cec8396de..7482c8d27c7 100644 --- a/src/mongo/s/catalog_cache.cpp +++ b/src/mongo/s/catalog_cache.cpp @@ -26,64 +26,395 @@ * it in the license file. */ +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding + #include "mongo/platform/basic.h" #include "mongo/s/catalog_cache.h" +#include "mongo/base/status.h" #include "mongo/base/status_with.h" +#include "mongo/db/operation_context.h" +#include "mongo/db/query/collation/collator_factory_interface.h" +#include "mongo/db/repl/optime_with.h" #include "mongo/s/catalog/sharding_catalog_client.h" +#include "mongo/s/catalog/type_collection.h" #include "mongo/s/catalog/type_database.h" -#include "mongo/s/config.h" +#include "mongo/s/chunk_diff.h" +#include "mongo/s/client/shard_registry.h" #include "mongo/s/grid.h" +#include "mongo/stdx/memory.h" +#include "mongo/util/log.h" +#include "mongo/util/timer.h" namespace mongo { +namespace { -using std::shared_ptr; -using std::string; +// How many times to try refreshing the routing info if the set of chunks loaded from the config +// server is found to be inconsistent. +const int kMaxInconsistentRoutingInfoRefreshAttempts = 3; -CatalogCache::CatalogCache() = default; +/** + * This is an adapter so we can use config diffs - mongos and mongod do them slightly differently. + * + * The mongos adapter here tracks all shards, and stores ranges by (max, Chunk) in the map. + */ +class CMConfigDiffTracker : public ConfigDiffTracker<std::shared_ptr<Chunk>> { +public: + CMConfigDiffTracker(const NamespaceString& nss, + RangeMap* currMap, + ChunkVersion* maxVersion, + MaxChunkVersionMap* maxShardVersions) + : ConfigDiffTracker<std::shared_ptr<Chunk>>( + nss.ns(), currMap, maxVersion, maxShardVersions) {} -CatalogCache::~CatalogCache() = default; + bool isTracked(const ChunkType& chunk) const final { + // Mongos tracks all shards + return true; + } -StatusWith<std::shared_ptr<DBConfig>> CatalogCache::getDatabase(OperationContext* txn, - StringData dbName) { - stdx::lock_guard<stdx::mutex> guard(_mutex); + bool isMinKeyIndexed() const final { + return false; + } - auto it = _databases.find(dbName); - if (it != _databases.end()) { - return it->second; + std::pair<BSONObj, std::shared_ptr<Chunk>> rangeFor(OperationContext* opCtx, + const ChunkType& chunk) const final { + return std::make_pair(chunk.getMax(), std::make_shared<Chunk>(chunk)); } - // Need to load from the store - auto status = Grid::get(txn)->catalogClient(txn)->getDatabase(txn, dbName.toString()); - if (!status.isOK()) { - return status.getStatus(); + ShardId shardFor(OperationContext* opCtx, const ShardId& shardId) const final { + const auto shard = + uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardId)); + return shard->getId(); } +}; + +} // namespace + +CatalogCache::CatalogCache() = default; + +CatalogCache::~CatalogCache() = default; + +StatusWith<CachedDatabaseInfo> CatalogCache::getDatabase(OperationContext* opCtx, + StringData dbName) { + stdx::lock_guard<stdx::mutex> lg(_mutex); - const auto& dbOpTimePair = status.getValue(); - auto db = std::make_shared<DBConfig>(dbOpTimePair.value, dbOpTimePair.opTime); try { - db->load(txn); - auto emplaceResult = _databases.try_emplace(dbName, std::move(db)); - return emplaceResult.first->second; + return {CachedDatabaseInfo(_getDatabase_inlock(opCtx, dbName))}; } catch (const DBException& ex) { return ex.toStatus(); } } -void CatalogCache::invalidate(StringData dbName) { - stdx::lock_guard<stdx::mutex> guard(_mutex); +StatusWith<CachedCollectionRoutingInfo> CatalogCache::getCollectionRoutingInfo( + OperationContext* opCtx, const NamespaceString& nss) { + int numRefreshAttempts = 0; - ShardedDatabasesMap::iterator it = _databases.find(dbName); - if (it != _databases.end()) { - _databases.erase(it); + while (true) { + stdx::unique_lock<stdx::mutex> ul(_mutex); + + std::shared_ptr<DatabaseInfoEntry> dbEntry; + try { + dbEntry = _getDatabase_inlock(opCtx, nss.db()); + } catch (const DBException& ex) { + return ex.toStatus(); + } + + auto& collections = dbEntry->collections; + + auto it = collections.find(nss.ns()); + if (it == collections.end()) { + auto shardStatus = + Grid::get(opCtx)->shardRegistry()->getShard(opCtx, dbEntry->primaryShardId); + if (!shardStatus.isOK()) { + return {ErrorCodes::fromInt(40371), + str::stream() << "The primary shard for collection " << nss.ns() + << " could not be loaded due to error " + << shardStatus.getStatus().toString()}; + } + + return {CachedCollectionRoutingInfo( + dbEntry->primaryShardId, nss, std::move(shardStatus.getValue()))}; + } + + auto& collEntry = it->second; + + if (collEntry.needsRefresh) { + numRefreshAttempts++; + + try { + auto newRoutingInfo = + refreshCollectionRoutingInfo(opCtx, nss, std::move(collEntry.routingInfo)); + if (newRoutingInfo == nullptr) { + collections.erase(it); + + // Loop around so we can return an "unsharded" routing info + continue; + } + + collEntry.routingInfo = std::move(newRoutingInfo); + collEntry.needsRefresh = false; + } catch (const DBException& ex) { + // It is possible that the metadata is being changed concurrently, so retry the + // refresh with a wait + if (ex.getCode() == ErrorCodes::ConflictingOperationInProgress && + numRefreshAttempts < kMaxInconsistentRoutingInfoRefreshAttempts) { + ul.unlock(); + + log() << "Metadata refresh for " << nss.ns() << " failed and will be retried" + << causedBy(redact(ex)); + + // Do the sleep outside of the mutex + sleepFor(Milliseconds(10) * numRefreshAttempts); + continue; + } + + return ex.toStatus(); + } + } + + return {CachedCollectionRoutingInfo(dbEntry->primaryShardId, collEntry.routingInfo)}; + } +} + +StatusWith<CachedCollectionRoutingInfo> CatalogCache::getCollectionRoutingInfo( + OperationContext* opCtx, StringData ns) { + return getCollectionRoutingInfo(opCtx, NamespaceString(ns)); +} + +StatusWith<CachedCollectionRoutingInfo> CatalogCache::getShardedCollectionRoutingInfoWithRefresh( + OperationContext* opCtx, const NamespaceString& nss) { + invalidateShardedCollection(nss); + + auto routingInfoStatus = getCollectionRoutingInfo(opCtx, nss); + if (routingInfoStatus.isOK() && !routingInfoStatus.getValue().cm()) { + return {ErrorCodes::NamespaceNotSharded, + str::stream() << "Collection " << nss.ns() << " is not sharded."}; } + + return routingInfoStatus; } -void CatalogCache::invalidateAll() { - stdx::lock_guard<stdx::mutex> guard(_mutex); +StatusWith<CachedCollectionRoutingInfo> CatalogCache::getShardedCollectionRoutingInfoWithRefresh( + OperationContext* opCtx, StringData ns) { + return getShardedCollectionRoutingInfoWithRefresh(opCtx, NamespaceString(ns)); +} + +void CatalogCache::onStaleConfigError(CachedCollectionRoutingInfo&& ccrt) { + if (!ccrt._cm) { + // Here we received a stale config error for a collection which we previously thought was + // unsharded. + invalidateShardedCollection(ccrt._nss); + return; + } + // Here we received a stale config error for a collection which we previously though was sharded + stdx::lock_guard<stdx::mutex> lg(_mutex); + + auto it = _databases.find(NamespaceString(ccrt._cm->getns()).db()); + if (it == _databases.end()) { + // If the database does not exist, the collection must have been dropped so there is + // nothing to invalidate. The getCollectionRoutingInfo will handle the reload of the + // entire database and its collections. + return; + } + + auto& collections = it->second->collections; + + auto itColl = collections.find(ccrt._cm->getns()); + if (itColl == collections.end()) { + // If the collection does not exist, this means it must have been dropped since the last + // time we retrieved a cache entry for it. Doing nothing in this case will cause the + // next call to getCollectionRoutingInfo to return an unsharded collection. + return; + } else if (itColl->second.needsRefresh) { + // Refresh has been scheduled for the collection already + return; + } else if (itColl->second.routingInfo->getVersion() == ccrt._cm->getVersion()) { + // If the versions match, the last version of the routing information that we used is no + // longer valid, so trigger a refresh. + itColl->second.needsRefresh = true; + } +} + +void CatalogCache::invalidateShardedCollection(const NamespaceString& nss) { + stdx::lock_guard<stdx::mutex> lg(_mutex); + + auto it = _databases.find(nss.db()); + if (it == _databases.end()) { + return; + } + + it->second->collections[nss.ns()].needsRefresh = true; +} + +void CatalogCache::invalidateShardedCollection(StringData ns) { + invalidateShardedCollection(NamespaceString(ns)); +} + +void CatalogCache::purgeDatabase(StringData dbName) { + stdx::lock_guard<stdx::mutex> lg(_mutex); + + auto it = _databases.find(dbName); + if (it == _databases.end()) { + return; + } + + _databases.erase(it); +} + +void CatalogCache::purgeAllDatabases() { + stdx::lock_guard<stdx::mutex> lg(_mutex); _databases.clear(); } +std::shared_ptr<ChunkManager> CatalogCache::refreshCollectionRoutingInfo( + OperationContext* opCtx, + const NamespaceString& nss, + std::shared_ptr<ChunkManager> existingRoutingInfo) { + Timer t; + + const auto catalogClient = Grid::get(opCtx)->catalogClient(opCtx); + + // Decide whether to do a full or partial load based on the state of the collection + auto collStatus = catalogClient->getCollection(opCtx, nss.ns()); + if (collStatus == ErrorCodes::NamespaceNotFound) { + return nullptr; + } + + const auto coll = uassertStatusOK(std::move(collStatus)).value; + if (coll.getDropped()) { + return nullptr; + } + + ChunkVersion startingCollectionVersion; + ChunkMap chunkMap = + SimpleBSONObjComparator::kInstance.makeBSONObjIndexedMap<std::shared_ptr<Chunk>>(); + + if (!existingRoutingInfo) { + // If we don't have a basis chunk manager, do a full refresh + startingCollectionVersion = ChunkVersion(0, 0, coll.getEpoch()); + } else if (existingRoutingInfo->getVersion().epoch() != coll.getEpoch()) { + // If the collection's epoch has changed, do a full refresh + startingCollectionVersion = ChunkVersion(0, 0, coll.getEpoch()); + } else { + // Otherwise do a partial refresh + startingCollectionVersion = existingRoutingInfo->getVersion(); + chunkMap = existingRoutingInfo->chunkMap(); + } + + log() << "Refreshing chunks based on version " << startingCollectionVersion; + + // Diff tracker should *always* find at least one chunk if collection exists + const auto diffQuery = + CMConfigDiffTracker::createConfigDiffQuery(nss, startingCollectionVersion); + + // Query the chunks which have changed + std::vector<ChunkType> newChunks; + repl::OpTime opTime; + uassertStatusOK(Grid::get(opCtx)->catalogClient(opCtx)->getChunks( + opCtx, + diffQuery.query, + diffQuery.sort, + boost::none, + &newChunks, + &opTime, + repl::ReadConcernLevel::kMajorityReadConcern)); + + ChunkVersion collectionVersion = startingCollectionVersion; + + ShardVersionMap unusedShardVersions; + CMConfigDiffTracker differ(nss, &chunkMap, &collectionVersion, &unusedShardVersions); + + const int diffsApplied = differ.calculateConfigDiff(opCtx, newChunks); + + if (diffsApplied < 1) { + log() << "Refresh took " << t.millis() << " ms and failed because the collection's " + "sharding metadata either changed in between or " + "became corrupted"; + + uasserted(ErrorCodes::ConflictingOperationInProgress, + "Collection sharding status changed during refresh or became corrupted"); + } + + // If at least one diff was applied, the metadata is correct, but it might not have changed so + // in this case there is no need to recreate the chunk manager. + // + // NOTE: In addition to the above statement, it is also important that we return the same chunk + // manager object, because the write commands' code relies on changes of the chunk manager's + // sequence number to detect batch writes not making progress because of chunks moving across + // shards too frequently. + if (collectionVersion == startingCollectionVersion) { + log() << "Refresh took " << t.millis() << " ms and didn't find any metadata changes"; + + return existingRoutingInfo; + } + + std::unique_ptr<CollatorInterface> defaultCollator; + if (!coll.getDefaultCollation().isEmpty()) { + // The collation should have been validated upon collection creation + defaultCollator = uassertStatusOK(CollatorFactoryInterface::get(opCtx->getServiceContext()) + ->makeFromBSON(coll.getDefaultCollation())); + } + + log() << "Refresh took " << t.millis() << " ms and found version " << collectionVersion; + + return stdx::make_unique<ChunkManager>(nss, + coll.getKeyPattern(), + std::move(defaultCollator), + coll.getUnique(), + std::move(chunkMap), + collectionVersion); +} + +std::shared_ptr<CatalogCache::DatabaseInfoEntry> CatalogCache::_getDatabase_inlock( + OperationContext* opCtx, StringData dbName) { + auto it = _databases.find(dbName); + if (it != _databases.end()) { + return it->second; + } + + const auto catalogClient = Grid::get(opCtx)->catalogClient(opCtx); + + const auto dbNameCopy = dbName.toString(); + + // Load the database entry + const auto opTimeWithDb = uassertStatusOK(catalogClient->getDatabase(opCtx, dbNameCopy)); + const auto& dbDesc = opTimeWithDb.value; + + // Load the sharded collections entries + std::vector<CollectionType> collections; + repl::OpTime collLoadConfigOptime; + uassertStatusOK( + catalogClient->getCollections(opCtx, &dbNameCopy, &collections, &collLoadConfigOptime)); + + StringMap<CollectionRoutingInfoEntry> collectionEntries; + for (const auto& coll : collections) { + collectionEntries[coll.getNs().ns()].needsRefresh = true; + } + + return _databases[dbName] = std::shared_ptr<DatabaseInfoEntry>(new DatabaseInfoEntry{ + dbDesc.getPrimary(), dbDesc.getSharded(), std::move(collectionEntries)}); +} + +CachedDatabaseInfo::CachedDatabaseInfo(std::shared_ptr<CatalogCache::DatabaseInfoEntry> db) + : _db(std::move(db)) {} + +const ShardId& CachedDatabaseInfo::primaryId() const { + return _db->primaryShardId; +} + +bool CachedDatabaseInfo::shardingEnabled() const { + return _db->shardingEnabled; +} + +CachedCollectionRoutingInfo::CachedCollectionRoutingInfo(ShardId primaryId, + std::shared_ptr<ChunkManager> cm) + : _primaryId(std::move(primaryId)), _cm(std::move(cm)) {} + +CachedCollectionRoutingInfo::CachedCollectionRoutingInfo(ShardId primaryId, + NamespaceString nss, + std::shared_ptr<Shard> primary) + : _primaryId(std::move(primaryId)), _nss(std::move(nss)), _primary(std::move(primary)) {} + } // namespace mongo diff --git a/src/mongo/s/catalog_cache.h b/src/mongo/s/catalog_cache.h index 8d30c1aebf0..528b2df4673 100644 --- a/src/mongo/s/catalog_cache.h +++ b/src/mongo/s/catalog_cache.h @@ -28,19 +28,20 @@ #pragma once -#include <memory> - #include "mongo/base/disallow_copying.h" #include "mongo/base/string_data.h" +#include "mongo/s/chunk_manager.h" +#include "mongo/s/chunk_version.h" +#include "mongo/s/client/shard.h" #include "mongo/stdx/mutex.h" +#include "mongo/util/concurrency/notification.h" #include "mongo/util/string_map.h" namespace mongo { -class DBConfig; +class CachedDatabaseInfo; +class CachedCollectionRoutingInfo; class OperationContext; -template <typename T> -class StatusWith; /** * This is the root of the "read-only" hierarchy of cached catalog metadata. It is read only @@ -62,26 +63,184 @@ public: * * Returns the database cache entry if the database exists or a failed status otherwise. */ - StatusWith<std::shared_ptr<DBConfig>> getDatabase(OperationContext* txn, StringData dbName); + StatusWith<CachedDatabaseInfo> getDatabase(OperationContext* opCtx, StringData dbName); + + /** + * Blocking shortcut method to get a specific sharded collection from a given database using the + * complete namespace. If the collection is sharded returns a ScopedChunkManager initialized + * with ChunkManager. If the collection is not sharded, returns a ScopedChunkManager initialized + * with the primary shard for the specified database. If an error occurs loading the metadata + * returns a failed status. + */ + StatusWith<CachedCollectionRoutingInfo> getCollectionRoutingInfo(OperationContext* opCtx, + const NamespaceString& nss); + StatusWith<CachedCollectionRoutingInfo> getCollectionRoutingInfo(OperationContext* opCtx, + StringData ns); + + /** + * Same as getCollectionRoutingInfo above, but in addition causes the namespace to be refreshed + * and returns a NamespaceNotSharded error if the collection is not sharded. + */ + StatusWith<CachedCollectionRoutingInfo> getShardedCollectionRoutingInfoWithRefresh( + OperationContext* opCtx, const NamespaceString& nss); + StatusWith<CachedCollectionRoutingInfo> getShardedCollectionRoutingInfoWithRefresh( + OperationContext* opCtx, StringData ns); + + /** + * Non-blocking method to be called whenever using the specified routing table has encountered a + * stale config exception. Returns immediately and causes the routing table to be refreshed the + * next time getCollectionRoutingInfo is called. Does nothing if the routing table has been + * refreshed already. + */ + void onStaleConfigError(CachedCollectionRoutingInfo&&); + + /** + * Non-blocking method, which indiscriminately causes the routing table for the specified + * namespace to be refreshed the next time getCollectionRoutingInfo is called. + */ + void invalidateShardedCollection(const NamespaceString& nss); + void invalidateShardedCollection(StringData ns); + + /** + * Blocking method, which removes the entire specified database (including its collections) from + * the cache. + */ + void purgeDatabase(StringData dbName); /** - * Removes the database information for the specified name from the cache, so that the - * next time getDatabase is called, it will be reloaded. + * Blocking method, which removes all databases (including their collections) from the cache. */ - void invalidate(StringData dbName); + void purgeAllDatabases(); /** - * Purges all cached database information, which will cause the data to be reloaded again. + * Blocking method, which refreshes the routing information for the specified collection. If + * 'existingRoutingInfo' has been specified uses this as a basis to perform an 'incremental' + * refresh, which only fetches the chunks which changed. Otherwise does a full refresh, fetching + * all the chunks for the collection. + * + * Returns the refreshed routing information if the collection is still sharded or nullptr if it + * is not. If refresh fails for any reason, throws a DBException. + * + * With the exception of ConflictingOperationInProgress, error codes thrown from this method are + * final in that there is nothing that can be done to remedy them other than pass the error to + * the user. + * + * ConflictingOperationInProgress indicates that the chunk metadata was found to be + * inconsistent. Since this may be transient, due to the collection being dropped or recreated, + * the caller must retry the reload up to some configurable number of attempts. + * + * NOTE: Should never be called directly and is exposed as public for testing purposes only. */ - void invalidateAll(); + static std::shared_ptr<ChunkManager> refreshCollectionRoutingInfo( + OperationContext* opCtx, + const NamespaceString& nss, + std::shared_ptr<ChunkManager> existingRoutingInfo); private: - using ShardedDatabasesMap = StringMap<std::shared_ptr<DBConfig>>; + // Make the cache entries friends so they can access the private classes below + friend class CachedDatabaseInfo; + friend class CachedCollectionRoutingInfo; + + /** + * Cache entry describing a collection. + */ + struct CollectionRoutingInfoEntry { + std::shared_ptr<ChunkManager> routingInfo; + + bool needsRefresh{true}; + }; + + /** + * Cache entry describing a database. + */ + struct DatabaseInfoEntry { + ShardId primaryShardId; + + bool shardingEnabled; + + StringMap<CollectionRoutingInfoEntry> collections; + }; + + using DatabaseInfoMap = StringMap<std::shared_ptr<DatabaseInfoEntry>>; + + /** + * Ensures that the specified database is in the cache, loading it if necessary. If the database + * was not in cache, all the sharded collections will be in the 'needsRefresh' state. + */ + std::shared_ptr<DatabaseInfoEntry> _getDatabase_inlock(OperationContext* opCtx, + StringData dbName); // Mutex to serialize access to the structures below stdx::mutex _mutex; - ShardedDatabasesMap _databases; + // Map from DB name to the info for that database + DatabaseInfoMap _databases; +}; + +/** + * Constructed exclusively by the CatalogCache, contains a reference to the cached information for + * the specified database. + */ +class CachedDatabaseInfo { +public: + const ShardId& primaryId() const; + + bool shardingEnabled() const; + +private: + friend class CatalogCache; + + CachedDatabaseInfo(std::shared_ptr<CatalogCache::DatabaseInfoEntry> db); + + std::shared_ptr<CatalogCache::DatabaseInfoEntry> _db; +}; + +/** + * Constructed exclusively by the CatalogCache contains a reference to the routing information for + * the specified collection. + */ +class CachedCollectionRoutingInfo { +public: + /** + * Returns the ID of the primary shard for the database owining this collection, regardless of + * whether it is sharded or not. + */ + const ShardId& primaryId() const { + return _primaryId; + } + + /** + * If the collection is sharded, returns a chunk manager for it. Otherwise, nullptr. + */ + std::shared_ptr<ChunkManager> cm() const { + return _cm; + } + + /** + * If the collection is not sharded, returns its primary shard. Otherwise, nullptr. + */ + std::shared_ptr<Shard> primary() const { + return _primary; + } + +private: + friend class CatalogCache; + + CachedCollectionRoutingInfo(ShardId primaryId, std::shared_ptr<ChunkManager> cm); + + CachedCollectionRoutingInfo(ShardId primaryId, + NamespaceString nss, + std::shared_ptr<Shard> primary); + + // The id of the primary shard containing the database + ShardId _primaryId; + + // Reference to the corresponding chunk manager (if sharded) or null + std::shared_ptr<ChunkManager> _cm; + + // Reference to the primary of the database (if not sharded) or null + NamespaceString _nss; + std::shared_ptr<Shard> _primary; }; } // namespace mongo diff --git a/src/mongo/s/chunk_diff.cpp b/src/mongo/s/chunk_diff.cpp index 90ec41b8923..5e9cd73bf84 100644 --- a/src/mongo/s/chunk_diff.cpp +++ b/src/mongo/s/chunk_diff.cpp @@ -105,7 +105,7 @@ int ConfigDiffTracker<ValType>::calculateConfigDiff(OperationContext* txn, // Store epoch now so it doesn't change when we change max OID currEpoch = _maxVersion->epoch(); - _validDiffs = 0; + int validDiffs = 0; for (const ChunkType& chunk : chunks) { const ChunkVersion& chunkVersion = chunk.getVersion(); @@ -121,7 +121,7 @@ int ConfigDiffTracker<ValType>::calculateConfigDiff(OperationContext* txn, return -1; } - _validDiffs++; + validDiffs++; // Get max changed version and chunk version if (chunkVersion > *_maxVersion) { @@ -151,7 +151,7 @@ int ConfigDiffTracker<ValType>::calculateConfigDiff(OperationContext* txn, } } - LOG(3) << "found " << _validDiffs << " new chunks for collection " << _ns << " (tracking " + LOG(3) << "found " << validDiffs << " new chunks for collection " << _ns << " (tracking " << newTracked.size() << "), new version is " << *_maxVersion; for (const ChunkType& chunk : newTracked) { @@ -167,7 +167,7 @@ int ConfigDiffTracker<ValType>::calculateConfigDiff(OperationContext* txn, _currMap->insert(rangeFor(txn, chunk)); } - return _validDiffs; + return validDiffs; } ConfigDiffTrackerBase::QueryAndSort ConfigDiffTrackerBase::createConfigDiffQuery( diff --git a/src/mongo/s/chunk_diff.h b/src/mongo/s/chunk_diff.h index 9ea6ed5b62e..9b4aaec150c 100644 --- a/src/mongo/s/chunk_diff.h +++ b/src/mongo/s/chunk_diff.h @@ -93,12 +93,8 @@ public: RangeMap* currMap, ChunkVersion* maxVersion, MaxChunkVersionMap* maxShardVersions); - virtual ~ConfigDiffTracker(); - // Call after load for more information - int numValidDiffs() const { - return _validDiffs; - } + virtual ~ConfigDiffTracker(); // Applies changes to the config data from a vector of chunks passed in. Also includes minor // version changes for particular major-version chunks if explicitly specified. @@ -135,9 +131,6 @@ private: RangeMap* const _currMap; ChunkVersion* const _maxVersion; MaxChunkVersionMap* const _maxShardVersions; - - // Store for later use - int _validDiffs{0}; }; } // namespace mongo diff --git a/src/mongo/s/chunk_manager.cpp b/src/mongo/s/chunk_manager.cpp index 75f2cc7c11c..de37ac85147 100644 --- a/src/mongo/s/chunk_manager.cpp +++ b/src/mongo/s/chunk_manager.cpp @@ -32,295 +32,49 @@ #include "mongo/s/chunk_manager.h" -#include <boost/next_prior.hpp> #include <vector> #include "mongo/bson/simple_bsonobj_comparator.h" -#include "mongo/client/read_preference.h" #include "mongo/db/matcher/extensions_callback_noop.h" #include "mongo/db/query/collation/collation_index_key.h" #include "mongo/db/query/index_bounds_builder.h" #include "mongo/db/query/query_planner.h" #include "mongo/db/query/query_planner_common.h" -#include "mongo/s/catalog/sharding_catalog_client.h" -#include "mongo/s/chunk_diff.h" -#include "mongo/s/client/shard_registry.h" -#include "mongo/s/grid.h" #include "mongo/util/log.h" -#include "mongo/util/timer.h" namespace mongo { - -using std::map; -using std::pair; -using std::set; -using std::shared_ptr; -using std::string; -using std::unique_ptr; - namespace { // Used to generate sequence numbers to assign to each newly created ChunkManager AtomicUInt32 nextCMSequenceNumber(0); -/** - * This is an adapter so we can use config diffs - mongos and mongod do them slightly differently. - * - * The mongos adapter here tracks all shards, and stores ranges by (max, Chunk) in the map. - */ -class CMConfigDiffTracker : public ConfigDiffTracker<shared_ptr<Chunk>> { -public: - CMConfigDiffTracker(const std::string& ns, - RangeMap* currMap, - ChunkVersion* maxVersion, - MaxChunkVersionMap* maxShardVersions, - ChunkManager* manager) - : ConfigDiffTracker<shared_ptr<Chunk>>(ns, currMap, maxVersion, maxShardVersions), - _manager(manager) {} - - bool isTracked(const ChunkType& chunk) const final { - // Mongos tracks all shards - return true; - } - - bool isMinKeyIndexed() const final { - return false; - } - - pair<BSONObj, shared_ptr<Chunk>> rangeFor(OperationContext* txn, - const ChunkType& chunk) const final { - return std::make_pair(chunk.getMax(), std::make_shared<Chunk>(chunk)); - } - - ShardId shardFor(OperationContext* txn, const ShardId& shardId) const final { - const auto shard = uassertStatusOK(Grid::get(txn)->shardRegistry()->getShard(txn, shardId)); - return shard->getId(); - } - -private: - ChunkManager* const _manager; -}; - -bool allOfType(BSONType type, const BSONObj& o) { - BSONObjIterator it(o); - while (it.more()) { - if (it.next().type() != type) { - return false; - } - } - return true; -} - -bool isChunkMapValid(const ChunkMap& chunkMap) { -#define ENSURE(x) \ - do { \ - if (!(x)) { \ - log() << "ChunkManager::_isValid failed: " #x; \ - return false; \ - } \ - } while (0) - - if (chunkMap.empty()) { - return true; - } - - // Check endpoints - ENSURE(allOfType(MinKey, chunkMap.begin()->second->getMin())); - ENSURE(allOfType(MaxKey, boost::prior(chunkMap.end())->second->getMax())); - - // Make sure there are no gaps or overlaps - for (ChunkMap::const_iterator it = boost::next(chunkMap.begin()), end = chunkMap.end(); - it != end; - ++it) { - ChunkMap::const_iterator last = boost::prior(it); - - if (SimpleBSONObjComparator::kInstance.evaluate(it->second->getMin() != - last->second->getMax())) { - log() << last->second->toString(); - log() << it->second->toString(); - log() << it->second->getMin(); - log() << last->second->getMax(); - } - - ENSURE(SimpleBSONObjComparator::kInstance.evaluate(it->second->getMin() == - last->second->getMax())); +void checkAllElementsAreOfType(BSONType type, const BSONObj& o) { + for (const auto&& element : o) { + uassert(ErrorCodes::ConflictingOperationInProgress, + str::stream() << "Not all elements of " << o << " are of type " << typeName(type), + element.type() == type); } - - return true; - -#undef ENSURE } } // namespace ChunkManager::ChunkManager(NamespaceString nss, - const OID& epoch, - const ShardKeyPattern& shardKeyPattern, + KeyPattern shardKeyPattern, std::unique_ptr<CollatorInterface> defaultCollator, - bool unique) + bool unique, + ChunkMap chunkMap, + ChunkVersion collectionVersion) : _sequenceNumber(nextCMSequenceNumber.addAndFetch(1)), _nss(std::move(nss)), - _keyPattern(shardKeyPattern.getKeyPattern()), + _shardKeyPattern(shardKeyPattern), _defaultCollator(std::move(defaultCollator)), _unique(unique), - _chunkMap(SimpleBSONObjComparator::kInstance.makeBSONObjIndexedMap<std::shared_ptr<Chunk>>()), - _chunkRangeMap( - SimpleBSONObjComparator::kInstance.makeBSONObjIndexedMap<ShardAndChunkRange>()), - _version(0, 0, epoch) {} + _chunkMap(std::move(chunkMap)), + _chunkMapViews(_constructChunkMapViews(collectionVersion.epoch(), _chunkMap)), + _collectionVersion(collectionVersion) {} ChunkManager::~ChunkManager() = default; -void ChunkManager::loadExistingRanges(OperationContext* txn, const ChunkManager* oldManager) { - invariant(!_version.isSet()); - - int tries = 3; - - while (tries--) { - ChunkMap chunkMap = - SimpleBSONObjComparator::kInstance.makeBSONObjIndexedMap<std::shared_ptr<Chunk>>(); - set<ShardId> shardIds; - ShardVersionMap shardVersions; - - Timer t; - - log() << "ChunkManager loading chunks for " << _nss - << " sequenceNumber: " << _sequenceNumber - << " based on: " << (oldManager ? oldManager->getVersion().toString() : "(empty)"); - - if (_load(txn, chunkMap, shardIds, &shardVersions, oldManager)) { - // TODO: Merge into diff code above, so we validate in one place - if (isChunkMapValid(chunkMap)) { - _chunkMap = std::move(chunkMap); - _shardVersions = std::move(shardVersions); - _chunkRangeMap = _constructRanges(_chunkMap); - - log() << "ChunkManager load took " << t.millis() << " ms and found version " - << _version; - - return; - } - } - - warning() << "ChunkManager load failed after " << t.millis() - << " ms and will be retried up to " << tries << " more times"; - - sleepmillis(10 * (3 - tries)); - } - - // This will abort construction so we should never have a reference to an invalid config - msgasserted(13282, - str::stream() << "Couldn't load a valid config for " << _nss.ns() - << " after 3 attempts. Please try again."); -} - -bool ChunkManager::_load(OperationContext* txn, - ChunkMap& chunkMap, - set<ShardId>& shardIds, - ShardVersionMap* shardVersions, - const ChunkManager* oldManager) { - // Reset the max version, but not the epoch, when we aren't loading from the oldManager - _version = ChunkVersion(0, 0, _version.epoch()); - - // If we have a previous version of the ChunkManager to work from, use that info to reduce - // our config query - if (oldManager && oldManager->getVersion().isSet()) { - // Get the old max version - _version = oldManager->getVersion(); - - // Load a copy of the old versions - *shardVersions = oldManager->_shardVersions; - - // Load a copy of the chunk map, replacing the chunk manager with our own - const ChunkMap& oldChunkMap = oldManager->getChunkMap(); - - for (const auto& oldChunkMapEntry : oldChunkMap) { - const auto& oldC = oldChunkMapEntry.second; - chunkMap.emplace(oldC->getMax(), std::make_shared<Chunk>(*oldC)); - } - - LOG(2) << "loading chunk manager for collection " << _nss - << " using old chunk manager w/ version " << _version.toString() << " and " - << oldChunkMap.size() << " chunks"; - } - - // Get the diff query required - const auto diffQuery = CMConfigDiffTracker::createConfigDiffQuery(_nss, _version); - - // Attach a diff tracker for the versioned chunk data - CMConfigDiffTracker differ(_nss.ns(), &chunkMap, &_version, shardVersions, this); - - // Diff tracker should *always* find at least one chunk if collection exists - repl::OpTime opTime; - std::vector<ChunkType> chunks; - uassertStatusOK(Grid::get(txn)->catalogClient(txn)->getChunks( - txn, - diffQuery.query, - diffQuery.sort, - boost::none, - &chunks, - &opTime, - repl::ReadConcernLevel::kMajorityReadConcern)); - - invariant(opTime >= _configOpTime); - _configOpTime = opTime; - - int diffsApplied = differ.calculateConfigDiff(txn, chunks); - if (diffsApplied > 0) { - LOG(2) << "loaded " << diffsApplied << " chunks into new chunk manager for " << _nss - << " with version " << _version; - - // Add all existing shards we find to the shards set - for (ShardVersionMap::iterator it = shardVersions->begin(); it != shardVersions->end();) { - auto shardStatus = Grid::get(txn)->shardRegistry()->getShard(txn, it->first); - if (shardStatus.isOK()) { - shardIds.insert(it->first); - ++it; - } else { - invariant(shardStatus == ErrorCodes::ShardNotFound); - shardVersions->erase(it++); - } - } - - _configOpTime = opTime; - - return true; - } else if (diffsApplied == 0) { - // No chunks were found for the ns - warning() << "no chunks found when reloading " << _nss << ", previous version was " - << _version; - - // Set all our data to empty - chunkMap.clear(); - shardVersions->clear(); - - _version = ChunkVersion(0, 0, OID()); - _configOpTime = opTime; - - return true; - } else { // diffsApplied < 0 - - bool allInconsistent = (differ.numValidDiffs() == 0); - if (allInconsistent) { - // All versions are different, this can be normal - warning() << "major change in chunk information found when reloading " << _nss - << ", previous version was " << _version; - } else { - // Inconsistent load halfway through (due to yielding cursor during load) - // should be rare - warning() << "inconsistent chunks found when reloading " << _nss - << ", previous version was " << _version << ", this should be rare"; - } - - // Set all our data to empty to be extra safe - chunkMap.clear(); - shardVersions->clear(); - - _version = ChunkVersion(0, 0, OID()); - - return allInconsistent; - } -} - std::shared_ptr<Chunk> ChunkManager::findIntersectingChunk(const BSONObj& shardKey, const BSONObj& collation) const { const bool hasSimpleCollation = (collation.isEmpty() && !_defaultCollator) || @@ -350,7 +104,7 @@ std::shared_ptr<Chunk> ChunkManager::findIntersectingChunkWithSimpleCollation( void ChunkManager::getShardIdsForQuery(OperationContext* txn, const BSONObj& query, const BSONObj& collation, - set<ShardId>* shardIds) const { + std::set<ShardId>* shardIds) const { auto qr = stdx::make_unique<QueryRequest>(_nss); qr->setFilter(query); @@ -369,7 +123,7 @@ void ChunkManager::getShardIdsForQuery(OperationContext* txn, } // Fast path for targeting equalities on the shard key. - auto shardKeyToFind = _keyPattern.extractShardKeyFromQuery(*cq); + auto shardKeyToFind = _shardKeyPattern.extractShardKeyFromQuery(*cq); if (!shardKeyToFind.isEmpty()) { try { auto chunk = findIntersectingChunk(shardKeyToFind, collation); @@ -386,20 +140,20 @@ void ChunkManager::getShardIdsForQuery(OperationContext* txn, // Query { a : { $gte : 1, $lt : 2 }, // b : { $gte : 3, $lt : 4 } } // => Bounds { a : [1, 2), b : [3, 4) } - IndexBounds bounds = getIndexBoundsForQuery(_keyPattern.toBSON(), *cq); + IndexBounds bounds = getIndexBoundsForQuery(_shardKeyPattern.toBSON(), *cq); // Transforms bounds for each shard key field into full shard key ranges // for example : // Key { a : 1, b : 1 } // Bounds { a : [1, 2), b : [3, 4) } // => Ranges { a : 1, b : 3 } => { a : 2, b : 4 } - BoundList ranges = _keyPattern.flattenBounds(bounds); + BoundList ranges = _shardKeyPattern.flattenBounds(bounds); for (BoundList::const_iterator it = ranges.begin(); it != ranges.end(); ++it) { getShardIdsForRange(it->first /*min*/, it->second /*max*/, shardIds); // once we know we need to visit all shards no need to keep looping - if (shardIds->size() == _shardVersions.size()) { + if (shardIds->size() == _chunkMapViews.shardVersions.size()) { break; } } @@ -408,38 +162,38 @@ void ChunkManager::getShardIdsForQuery(OperationContext* txn, // For now, we satisfy that assumption by adding a shard with no matches rather than returning // an empty set of shards. if (shardIds->empty()) { - shardIds->insert(_chunkRangeMap.begin()->second.getShardId()); + shardIds->insert(_chunkMapViews.chunkRangeMap.begin()->second.shardId); } } void ChunkManager::getShardIdsForRange(const BSONObj& min, const BSONObj& max, std::set<ShardId>* shardIds) const { - auto it = _chunkRangeMap.upper_bound(min); - auto end = _chunkRangeMap.upper_bound(max); + auto it = _chunkMapViews.chunkRangeMap.upper_bound(min); + auto end = _chunkMapViews.chunkRangeMap.upper_bound(max); // The chunk range map must always cover the entire key space - invariant(it != _chunkRangeMap.end()); + invariant(it != _chunkMapViews.chunkRangeMap.end()); // We need to include the last chunk - if (end != _chunkRangeMap.cend()) { + if (end != _chunkMapViews.chunkRangeMap.cend()) { ++end; } for (; it != end; ++it) { - shardIds->insert(it->second.getShardId()); + shardIds->insert(it->second.shardId); // No need to iterate through the rest of the ranges, because we already know we need to use // all shards. - if (shardIds->size() == _shardVersions.size()) { + if (shardIds->size() == _chunkMapViews.shardVersions.size()) { break; } } } -void ChunkManager::getAllShardIds(set<ShardId>* all) const { - std::transform(_shardVersions.begin(), - _shardVersions.end(), +void ChunkManager::getAllShardIds(std::set<ShardId>* all) const { + std::transform(_chunkMapViews.shardVersions.begin(), + _chunkMapViews.shardVersions.end(), std::inserter(*all, all->begin()), [](const ShardVersionMap::value_type& pair) { return pair.first; }); } @@ -456,7 +210,7 @@ IndexBounds ChunkManager::getIndexBoundsForQuery(const BSONObj& key, } // Consider shard key as an index - string accessMethod = IndexNames::findPluginName(key); + std::string accessMethod = IndexNames::findPluginName(key); dassert(accessMethod == IndexNames::BTREE || accessMethod == IndexNames::HASHED); // Use query framework to generate index bounds @@ -563,19 +317,19 @@ bool ChunkManager::compatibleWith(const ChunkManager& other, const ShardId& shar } ChunkVersion ChunkManager::getVersion(const ShardId& shardName) const { - auto it = _shardVersions.find(shardName); - if (it == _shardVersions.end()) { + auto it = _chunkMapViews.shardVersions.find(shardName); + if (it == _chunkMapViews.shardVersions.end()) { // Shards without explicitly tracked shard versions (meaning they have no chunks) always // have a version of (0, 0, epoch) - return ChunkVersion(0, 0, _version.epoch()); + return ChunkVersion(0, 0, _collectionVersion.epoch()); } return it->second; } -string ChunkManager::toString() const { +std::string ChunkManager::toString() const { StringBuilder sb; - sb << "ChunkManager: " << _nss.ns() << " key:" << _keyPattern.toString() << '\n'; + sb << "ChunkManager: " << _nss.ns() << " key:" << _shardKeyPattern.toString() << '\n'; for (const auto& entry : _chunkMap) { sb << "\t" << entry.second->toString() << '\n'; @@ -584,47 +338,82 @@ string ChunkManager::toString() const { return sb.str(); } -ChunkManager::ChunkRangeMap ChunkManager::_constructRanges(const ChunkMap& chunkMap) { +ChunkManager::ChunkMapViews ChunkManager::_constructChunkMapViews(const OID& epoch, + const ChunkMap& chunkMap) { + invariant(!chunkMap.empty()); + ChunkRangeMap chunkRangeMap = SimpleBSONObjComparator::kInstance.makeBSONObjIndexedMap<ShardAndChunkRange>(); - if (chunkMap.empty()) { - return chunkRangeMap; - } + ShardVersionMap shardVersions; ChunkMap::const_iterator current = chunkMap.cbegin(); while (current != chunkMap.cend()) { - const auto rangeFirst = current; + const auto& firstChunkInRange = current->second; + + // Tracks the max shard version for the shard on which the current range will reside + auto shardVersionIt = shardVersions.find(firstChunkInRange->getShardId()); + if (shardVersionIt == shardVersions.end()) { + shardVersionIt = + shardVersions.emplace(firstChunkInRange->getShardId(), ChunkVersion(0, 0, epoch)) + .first; + } + + auto& maxShardVersion = shardVersionIt->second; + current = std::find_if( - current, chunkMap.cend(), [&rangeFirst](const ChunkMap::value_type& chunkMapEntry) { - return chunkMapEntry.second->getShardId() != rangeFirst->second->getShardId(); + current, + chunkMap.cend(), + [&firstChunkInRange, &maxShardVersion](const ChunkMap::value_type& chunkMapEntry) { + const auto& currentChunk = chunkMapEntry.second; + + if (currentChunk->getShardId() != firstChunkInRange->getShardId()) + return true; + + if (currentChunk->getLastmod() > maxShardVersion) + maxShardVersion = currentChunk->getLastmod(); + + return false; }); + const auto rangeLast = std::prev(current); - const BSONObj rangeMin = rangeFirst->second->getMin(); + const BSONObj rangeMin = firstChunkInRange->getMin(); const BSONObj rangeMax = rangeLast->second->getMax(); - auto insertResult = chunkRangeMap.insert(std::make_pair( - rangeMax, ShardAndChunkRange(rangeMin, rangeMax, rangeFirst->second->getShardId()))); - invariant(insertResult.second); - if (insertResult.first != chunkRangeMap.begin()) { + const auto insertResult = chunkRangeMap.insert(std::make_pair( + rangeMax, ShardAndChunkRange{{rangeMin, rangeMax}, firstChunkInRange->getShardId()})); + uassert(ErrorCodes::ConflictingOperationInProgress, + str::stream() << "Metadata contains two chunks with the same max value " + << rangeMax, + insertResult.second); + + const auto& insertIterator = insertResult.first; + + if (insertIterator != chunkRangeMap.begin()) { // Make sure there are no gaps in the ranges - insertResult.first--; - invariant( - SimpleBSONObjComparator::kInstance.evaluate(insertResult.first->first == rangeMin)); + uassert(ErrorCodes::ConflictingOperationInProgress, + str::stream() << "Gap or an overlap between ranges " + << insertIterator->second.range.toString() + << " and " + << std::prev(insertIterator)->second.range.toString(), + SimpleBSONObjComparator::kInstance.evaluate(std::prev(insertIterator)->first == + rangeMin)); } + + // If a shard has chunks it must have a shard version, otherwise we have an invalid chunk + // somewhere, which should have been caught at chunk load time + invariant(maxShardVersion.isSet()); } invariant(!chunkRangeMap.empty()); - invariant(allOfType(MinKey, chunkRangeMap.begin()->second.getMin())); - invariant(allOfType(MaxKey, chunkRangeMap.rbegin()->first)); + invariant(!shardVersions.empty()); - return chunkRangeMap; -} + checkAllElementsAreOfType(MinKey, chunkRangeMap.begin()->second.min()); + checkAllElementsAreOfType(MaxKey, chunkRangeMap.rbegin()->first); -repl::OpTime ChunkManager::getConfigOpTime() const { - return _configOpTime; + return {std::move(chunkRangeMap), std::move(shardVersions)}; } } // namespace mongo diff --git a/src/mongo/s/chunk_manager.h b/src/mongo/s/chunk_manager.h index 08d1d9fe229..f0ae4e5e758 100644 --- a/src/mongo/s/chunk_manager.h +++ b/src/mongo/s/chunk_manager.h @@ -35,8 +35,6 @@ #include "mongo/base/disallow_copying.h" #include "mongo/db/namespace_string.h" #include "mongo/db/query/collation/collator_interface.h" -#include "mongo/db/repl/optime.h" -#include "mongo/s/catalog/type_chunk.h" #include "mongo/s/chunk.h" #include "mongo/s/chunk_version.h" #include "mongo/s/client/shard.h" @@ -60,10 +58,11 @@ class ChunkManager { public: ChunkManager(NamespaceString nss, - const OID& epoch, - const ShardKeyPattern& shardKeyPattern, + KeyPattern shardKeyPattern, std::unique_ptr<CollatorInterface> defaultCollator, - bool unique); + bool unique, + ChunkMap chunkMap, + ChunkVersion collectionVersion); ~ChunkManager(); @@ -79,7 +78,7 @@ public: } const ShardKeyPattern& getShardKeyPattern() const { - return _keyPattern; + return _shardKeyPattern; } const CollatorInterface* getDefaultCollator() const { @@ -91,10 +90,12 @@ public: } ChunkVersion getVersion() const { - return _version; + return _collectionVersion; } - const ChunkMap& getChunkMap() const { + ChunkVersion getVersion(const ShardId& shardId) const; + + const ChunkMap& chunkMap() const { return _chunkMap; } @@ -102,12 +103,9 @@ public: return _chunkMap.size(); } - // Loads existing ranges based on info in chunk manager - void loadExistingRanges(OperationContext* txn, const ChunkManager* oldManager); - - // - // Methods to use once loaded / created - // + const ShardVersionMap& shardVersions() const { + return _chunkMapViews.shardVersions; + } /** * Given a shard key (or a prefix) that has been extracted from a document, returns the chunk @@ -177,57 +175,46 @@ public: std::string toString() const; - ChunkVersion getVersion(const ShardId& shardName) const; - - /** - * Returns the opTime of config server the last time chunks were loaded. - */ - repl::OpTime getConfigOpTime() const; - private: + friend class CollectionRoutingDataLoader; + /** * Represents a range of chunk keys [getMin(), getMax()) and the id of the shard on which they * reside according to the metadata. */ - class ShardAndChunkRange { - public: - ShardAndChunkRange(const BSONObj& min, const BSONObj& max, ShardId inShardId) - : _range(min, max), _shardId(std::move(inShardId)) {} - - const BSONObj& getMin() const { - return _range.getMin(); - } - - const BSONObj& getMax() const { - return _range.getMax(); + struct ShardAndChunkRange { + const BSONObj& min() const { + return range.getMin(); } - const ShardId& getShardId() const { - return _shardId; + const BSONObj& max() const { + return range.getMax(); } - private: - ChunkRange _range; - ShardId _shardId; + ChunkRange range; + ShardId shardId; }; using ChunkRangeMap = BSONObjIndexedMap<ShardAndChunkRange>; /** - * If load was successful, returns true and it is guaranteed that the _chunkMap and - * _chunkRangeMap are consistent with each other. If false is returned, it is not safe to use - * the chunk manager anymore. + * Contains different transformations of the chunk map for efficient querying */ - bool _load(OperationContext* txn, - ChunkMap& chunks, - std::set<ShardId>& shardIds, - ShardVersionMap* shardVersions, - const ChunkManager* oldManager); + struct ChunkMapViews { + // Transformation of the chunk map containing what range of keys reside on which shard. The + // index is the max key of the respective range and the union of all ranges in a such + // constructed map must cover the complete space from [MinKey, MaxKey). + const ChunkRangeMap chunkRangeMap; + + // Map from shard id to the maximum chunk version for that shard. If a shard contains no + // chunks, it won't be present in this map. + const ShardVersionMap shardVersions; + }; /** - * Merges consecutive chunks, which reside on the same shard into a single range. + * Does a single pass over the chunkMap and constructs the ChunkMapViews object. */ - static ChunkRangeMap _constructRanges(const ChunkMap& chunkMap); + static ChunkMapViews _constructChunkMapViews(const OID& epoch, const ChunkMap& chunkMap); // The shard versioning mechanism hinges on keeping track of the number of times we reload // ChunkManagers. @@ -237,7 +224,7 @@ private: const NamespaceString _nss; // The key pattern used to shard the collection - const ShardKeyPattern _keyPattern; + const ShardKeyPattern _shardKeyPattern; // Default collation to use for routing data queries for this collection const std::unique_ptr<CollatorInterface> _defaultCollator; @@ -247,23 +234,15 @@ private: // Map from the max for each chunk to an entry describing the chunk. The union of all chunks' // ranges must cover the complete space from [MinKey, MaxKey). - ChunkMap _chunkMap; - - // Transformation of the chunk map containing what range of keys reside on which shard. The - // index is the max key of the respective range and the union of all ranges in a such - // constructed map must cover the complete space from [MinKey, MaxKey). - ChunkRangeMap _chunkRangeMap; + const ChunkMap _chunkMap; - // Max known version per shard - ShardVersionMap _shardVersions; + // Different transformations of the chunk map for efficient querying + const ChunkMapViews _chunkMapViews; // Max version across all chunks - ChunkVersion _version; + const ChunkVersion _collectionVersion; - // OpTime of config server the last time chunks were loaded. - repl::OpTime _configOpTime; - - // Auto-split throttling state + // Auto-split throttling state (state mutable by write commands) struct AutoSplitThrottle { public: AutoSplitThrottle() : _splitTickets(maxParallelSplits) {} @@ -280,8 +259,6 @@ private: ChunkManager*, Chunk*, long); - - friend class TestableChunkManager; }; } // namespace mongo diff --git a/src/mongo/s/chunk_manager_test.cpp b/src/mongo/s/chunk_manager_query_test.cpp index b0599fcf690..013c4618d6c 100644 --- a/src/mongo/s/chunk_manager_test.cpp +++ b/src/mongo/s/chunk_manager_query_test.cpp @@ -32,143 +32,13 @@ #include <set> -#include "mongo/client/remote_command_targeter_mock.h" -#include "mongo/db/client.h" #include "mongo/db/query/collation/collator_interface_mock.h" -#include "mongo/s/catalog/sharding_catalog_test_fixture.h" -#include "mongo/s/catalog/type_chunk.h" -#include "mongo/s/catalog/type_collection.h" -#include "mongo/s/catalog/type_shard.h" #include "mongo/s/chunk_manager.h" -#include "mongo/stdx/memory.h" -#include "mongo/util/scopeguard.h" +#include "mongo/s/chunk_manager_test_fixture.h" namespace mongo { namespace { -using executor::RemoteCommandResponse; -using executor::RemoteCommandRequest; - -const NamespaceString kNss("TestDB", "TestColl"); - -class ChunkManagerTestFixture : public ShardingCatalogTestFixture { -protected: - void setUp() override { - ShardingCatalogTestFixture::setUp(); - setRemote(HostAndPort("FakeRemoteClient:34567")); - configTargeter()->setFindHostReturnValue(HostAndPort{CONFIG_HOST_PORT}); - } - - /** - * Returns a chunk manager with chunks at the specified split points. Each individual chunk is - * placed on a separate shard with id ranging from "0" to the number of chunks. - */ - std::unique_ptr<ChunkManager> makeChunkManager( - const ShardKeyPattern& shardKeyPattern, - std::unique_ptr<CollatorInterface> defaultCollator, - bool unique, - const std::vector<BSONObj>& splitPoints) { - ChunkVersion version(1, 0, OID::gen()); - - std::vector<BSONObj> shards; - std::vector<BSONObj> initialChunks; - - auto splitPointsIncludingEnds(splitPoints); - splitPointsIncludingEnds.insert(splitPointsIncludingEnds.begin(), - shardKeyPattern.getKeyPattern().globalMin()); - splitPointsIncludingEnds.push_back(shardKeyPattern.getKeyPattern().globalMax()); - - for (size_t i = 1; i < splitPointsIncludingEnds.size(); ++i) { - ShardType shard; - shard.setName(str::stream() << (i - 1)); - shard.setHost(str::stream() << "Host" << (i - 1) << ":12345"); - - shards.push_back(shard.toBSON()); - - ChunkType chunk; - chunk.setNS(kNss.ns()); - chunk.setMin(shardKeyPattern.getKeyPattern().extendRangeBound( - splitPointsIncludingEnds[i - 1], false)); - chunk.setMax(shardKeyPattern.getKeyPattern().extendRangeBound( - splitPointsIncludingEnds[i], false)); - chunk.setShard(shard.getName()); - chunk.setVersion(version); - - initialChunks.push_back(chunk.toBSON()); - - version.incMajor(); - } - - // Load the initial manager - auto manager = stdx::make_unique<ChunkManager>( - kNss, version.epoch(), shardKeyPattern, std::move(defaultCollator), unique); - - auto future = launchAsync([&manager] { - ON_BLOCK_EXIT([&] { Client::destroy(); }); - Client::initThread("Test"); - auto opCtx = cc().makeOperationContext(); - manager->loadExistingRanges(opCtx.get(), nullptr); - }); - - expectFindOnConfigSendBSONObjVector(initialChunks); - expectFindOnConfigSendBSONObjVector(shards); - - future.timed_get(kFutureTimeout); - - return manager; - } -}; - -using ChunkManagerLoadTest = ChunkManagerTestFixture; - -TEST_F(ChunkManagerLoadTest, IncrementalLoadAfterSplit) { - const ShardKeyPattern shardKeyPattern(BSON("_id" << 1)); - - auto initialManager(makeChunkManager(shardKeyPattern, nullptr, true, {})); - - ChunkVersion version = initialManager->getVersion(); - - CollectionType collType; - collType.setNs(kNss); - collType.setEpoch(version.epoch()); - collType.setUpdatedAt(jsTime()); - collType.setKeyPattern(shardKeyPattern.toBSON()); - collType.setUnique(false); - - ChunkManager manager(kNss, version.epoch(), shardKeyPattern, nullptr, true); - - auto future = - launchAsync([&] { manager.loadExistingRanges(operationContext(), initialManager.get()); }); - - // Return set of chunks, which represent a split - expectFindOnConfigSendBSONObjVector([&]() { - version.incMajor(); - - ChunkType chunk1; - chunk1.setNS(kNss.ns()); - chunk1.setMin(shardKeyPattern.getKeyPattern().globalMin()); - chunk1.setMax(BSON("_id" << 0)); - chunk1.setShard({"0"}); - chunk1.setVersion(version); - - version.incMinor(); - - ChunkType chunk2; - chunk2.setNS(kNss.ns()); - chunk2.setMin(BSON("_id" << 0)); - chunk2.setMax(shardKeyPattern.getKeyPattern().globalMax()); - chunk2.setShard({"0"}); - chunk2.setVersion(version); - - return std::vector<BSONObj>{chunk1.toBSON(), chunk2.toBSON()}; - }()); - - future.timed_get(kFutureTimeout); -} - -/** - * Fixture to be used as a shortcut for tests which exercise the getShardIdsForQuery routing logic - */ class ChunkManagerQueryTest : public ChunkManagerTestFixture { protected: void runQueryTest(const BSONObj& shardKey, diff --git a/src/mongo/s/chunk_manager_refresh_test.cpp b/src/mongo/s/chunk_manager_refresh_test.cpp new file mode 100644 index 00000000000..511a4ed15d9 --- /dev/null +++ b/src/mongo/s/chunk_manager_refresh_test.cpp @@ -0,0 +1,195 @@ +/** + * Copyright (C) 2017 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kDefault + +#include "mongo/platform/basic.h" + +#include <set> + +#include "mongo/db/client.h" +#include "mongo/s/catalog/type_chunk.h" +#include "mongo/s/catalog/type_collection.h" +#include "mongo/s/catalog/type_shard.h" +#include "mongo/s/catalog_cache.h" +#include "mongo/s/chunk_manager_test_fixture.h" +#include "mongo/util/scopeguard.h" + +namespace mongo { +namespace { + +using ChunkManagerLoadTest = ChunkManagerTestFixture; + +TEST_F(ChunkManagerLoadTest, IncrementalLoadAfterSplit) { + const ShardKeyPattern shardKeyPattern(BSON("_id" << 1)); + + auto initialRoutingInfo(makeChunkManager(shardKeyPattern, nullptr, true, {})); + ASSERT_EQ(1, initialRoutingInfo->numChunks()); + + auto future = launchAsync([&] { + auto client = serviceContext()->makeClient("Test"); + auto opCtx = client->makeOperationContext(); + return CatalogCache::refreshCollectionRoutingInfo(opCtx.get(), kNss, initialRoutingInfo); + }); + + ChunkVersion version = initialRoutingInfo->getVersion(); + + expectFindOnConfigSendBSONObjVector([&]() { + CollectionType collType; + collType.setNs(kNss); + collType.setEpoch(version.epoch()); + collType.setKeyPattern(shardKeyPattern.toBSON()); + collType.setUnique(false); + + return std::vector<BSONObj>{collType.toBSON()}; + }()); + + // Return set of chunks, which represent a split + expectFindOnConfigSendBSONObjVector([&]() { + version.incMajor(); + ChunkType chunk1( + kNss, {shardKeyPattern.getKeyPattern().globalMin(), BSON("_id" << 0)}, version, {"0"}); + + version.incMinor(); + ChunkType chunk2( + kNss, {BSON("_id" << 0), shardKeyPattern.getKeyPattern().globalMax()}, version, {"0"}); + + return std::vector<BSONObj>{chunk1.toBSON(), chunk2.toBSON()}; + }()); + + auto newRoutingInfo(future.timed_get(kFutureTimeout)); + ASSERT_EQ(2, newRoutingInfo->numChunks()); + ASSERT_EQ(version, newRoutingInfo->getVersion()); + ASSERT_EQ(version, newRoutingInfo->getVersion({"0"})); + ASSERT_EQ(ChunkVersion(0, 0, version.epoch()), newRoutingInfo->getVersion({"1"})); +} + +TEST_F(ChunkManagerLoadTest, IncrementalLoadAfterMove) { + const ShardKeyPattern shardKeyPattern(BSON("_id" << 1)); + + auto initialRoutingInfo(makeChunkManager(shardKeyPattern, nullptr, true, {BSON("_id" << 0)})); + ASSERT_EQ(2, initialRoutingInfo->numChunks()); + + auto future = launchAsync([&] { + auto client = serviceContext()->makeClient("Test"); + auto opCtx = client->makeOperationContext(); + return CatalogCache::refreshCollectionRoutingInfo(opCtx.get(), kNss, initialRoutingInfo); + }); + + ChunkVersion version = initialRoutingInfo->getVersion(); + + expectFindOnConfigSendBSONObjVector([&]() { + CollectionType collType; + collType.setNs(kNss); + collType.setEpoch(version.epoch()); + collType.setKeyPattern(shardKeyPattern.toBSON()); + collType.setUnique(false); + + return std::vector<BSONObj>{collType.toBSON()}; + }()); + + ChunkVersion expectedDestShardVersion; + + // Return set of chunks, which represent a move + expectFindOnConfigSendBSONObjVector([&]() { + version.incMajor(); + expectedDestShardVersion = version; + ChunkType chunk1( + kNss, {shardKeyPattern.getKeyPattern().globalMin(), BSON("_id" << 0)}, version, {"1"}); + + version.incMinor(); + ChunkType chunk2( + kNss, {BSON("_id" << 0), shardKeyPattern.getKeyPattern().globalMax()}, version, {"0"}); + + return std::vector<BSONObj>{chunk1.toBSON(), chunk2.toBSON()}; + }()); + + auto newRoutingInfo(future.timed_get(kFutureTimeout)); + ASSERT_EQ(2, newRoutingInfo->numChunks()); + ASSERT_EQ(version, newRoutingInfo->getVersion()); + ASSERT_EQ(version, newRoutingInfo->getVersion({"0"})); + ASSERT_EQ(expectedDestShardVersion, newRoutingInfo->getVersion({"1"})); +} + +TEST_F(ChunkManagerLoadTest, IncrementalLoadAfterMoveLastChunk) { + const ShardKeyPattern shardKeyPattern(BSON("_id" << 1)); + + auto initialRoutingInfo(makeChunkManager(shardKeyPattern, nullptr, true, {})); + ASSERT_EQ(1, initialRoutingInfo->numChunks()); + + auto future = launchAsync([&] { + auto client = serviceContext()->makeClient("Test"); + auto opCtx = client->makeOperationContext(); + return CatalogCache::refreshCollectionRoutingInfo(opCtx.get(), kNss, initialRoutingInfo); + }); + + ChunkVersion version = initialRoutingInfo->getVersion(); + + expectFindOnConfigSendBSONObjVector([&]() { + CollectionType collType; + collType.setNs(kNss); + collType.setEpoch(version.epoch()); + collType.setKeyPattern(shardKeyPattern.toBSON()); + collType.setUnique(false); + + return std::vector<BSONObj>{collType.toBSON()}; + }()); + + // Return set of chunks, which represent a move + expectFindOnConfigSendBSONObjVector([&]() { + version.incMajor(); + ChunkType chunk1(kNss, + {shardKeyPattern.getKeyPattern().globalMin(), + shardKeyPattern.getKeyPattern().globalMax()}, + version, + {"1"}); + + return std::vector<BSONObj>{chunk1.toBSON()}; + }()); + + expectFindOnConfigSendBSONObjVector([&]() { + ShardType shard1; + shard1.setName("0"); + shard1.setHost(str::stream() << "Host0:12345"); + + ShardType shard2; + shard2.setName("1"); + shard2.setHost(str::stream() << "Host1:12345"); + + return std::vector<BSONObj>{shard1.toBSON(), shard2.toBSON()}; + }()); + + auto newRoutingInfo(future.timed_get(kFutureTimeout)); + ASSERT_EQ(1, newRoutingInfo->numChunks()); + ASSERT_EQ(version, newRoutingInfo->getVersion()); + ASSERT_EQ(ChunkVersion(0, 0, version.epoch()), newRoutingInfo->getVersion({"0"})); + ASSERT_EQ(version, newRoutingInfo->getVersion({"1"})); +} + +} // namespace +} // namespace mongo diff --git a/src/mongo/s/chunk_manager_test_fixture.cpp b/src/mongo/s/chunk_manager_test_fixture.cpp new file mode 100644 index 00000000000..6d8ca081bc8 --- /dev/null +++ b/src/mongo/s/chunk_manager_test_fixture.cpp @@ -0,0 +1,122 @@ +/** + * Copyright (C) 2017 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kDefault + +#include "mongo/platform/basic.h" + +#include <set> +#include <vector> + +#include "mongo/s/chunk_manager_test_fixture.h" + +#include "mongo/client/remote_command_targeter_mock.h" +#include "mongo/db/client.h" +#include "mongo/db/query/collation/collator_factory_mock.h" +#include "mongo/s/catalog/type_chunk.h" +#include "mongo/s/catalog/type_collection.h" +#include "mongo/s/catalog/type_shard.h" +#include "mongo/s/catalog_cache.h" +#include "mongo/stdx/memory.h" +#include "mongo/util/scopeguard.h" + +namespace mongo { + +const NamespaceString ChunkManagerTestFixture::kNss("TestDB", "TestColl"); + +void ChunkManagerTestFixture::setUp() { + ShardingCatalogTestFixture::setUp(); + setRemote(HostAndPort("FakeRemoteClient:34567")); + configTargeter()->setFindHostReturnValue(HostAndPort{CONFIG_HOST_PORT}); + + CollatorFactoryInterface::set(serviceContext(), stdx::make_unique<CollatorFactoryMock>()); +} + +std::shared_ptr<ChunkManager> ChunkManagerTestFixture::makeChunkManager( + const ShardKeyPattern& shardKeyPattern, + std::unique_ptr<CollatorInterface> defaultCollator, + bool unique, + const std::vector<BSONObj>& splitPoints) { + ChunkVersion version(1, 0, OID::gen()); + + const BSONObj collectionBSON = [&]() { + CollectionType coll; + coll.setNs(kNss); + coll.setEpoch(version.epoch()); + coll.setKeyPattern(shardKeyPattern.getKeyPattern()); + coll.setUnique(unique); + + if (defaultCollator) { + coll.setDefaultCollation(defaultCollator->getSpec().toBSON()); + } + + return coll.toBSON(); + }(); + + std::vector<BSONObj> shards; + std::vector<BSONObj> initialChunks; + + auto splitPointsIncludingEnds(splitPoints); + splitPointsIncludingEnds.insert(splitPointsIncludingEnds.begin(), + shardKeyPattern.getKeyPattern().globalMin()); + splitPointsIncludingEnds.push_back(shardKeyPattern.getKeyPattern().globalMax()); + + for (size_t i = 1; i < splitPointsIncludingEnds.size(); ++i) { + ShardType shard; + shard.setName(str::stream() << (i - 1)); + shard.setHost(str::stream() << "Host" << (i - 1) << ":12345"); + + shards.push_back(shard.toBSON()); + + ChunkType chunk( + kNss, + {shardKeyPattern.getKeyPattern().extendRangeBound(splitPointsIncludingEnds[i - 1], + false), + shardKeyPattern.getKeyPattern().extendRangeBound(splitPointsIncludingEnds[i], false)}, + version, + shard.getName()); + + initialChunks.push_back(chunk.toBSON()); + + version.incMajor(); + } + + auto future = launchAsync([&] { + auto client = serviceContext()->makeClient("Test"); + auto opCtx = client->makeOperationContext(); + return CatalogCache::refreshCollectionRoutingInfo(opCtx.get(), kNss, nullptr); + }); + + expectFindOnConfigSendBSONObjVector({collectionBSON}); + expectFindOnConfigSendBSONObjVector(initialChunks); + expectFindOnConfigSendBSONObjVector(shards); + + return future.timed_get(kFutureTimeout); +} + +} // namespace mongo diff --git a/src/mongo/s/chunk_manager_test_fixture.h b/src/mongo/s/chunk_manager_test_fixture.h new file mode 100644 index 00000000000..aaa059dd49d --- /dev/null +++ b/src/mongo/s/chunk_manager_test_fixture.h @@ -0,0 +1,62 @@ +/** + * Copyright (C) 2017 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <vector> + +#include "mongo/db/namespace_string.h" +#include "mongo/s/catalog/sharding_catalog_test_fixture.h" +#include "mongo/stdx/memory.h" + +namespace mongo { + +class BSONObj; +class ChunkManager; +class CollatorInterface; +class ShardKeyPattern; + +class ChunkManagerTestFixture : public ShardingCatalogTestFixture { +protected: + void setUp() override; + + /** + * Returns a chunk manager with chunks at the specified split points. Each individual chunk is + * placed on a separate shard with shard id being a single number ranging from "0" to the number + * of chunks. + */ + std::shared_ptr<ChunkManager> makeChunkManager( + const ShardKeyPattern& shardKeyPattern, + std::unique_ptr<CollatorInterface> defaultCollator, + bool unique, + const std::vector<BSONObj>& splitPoints); + + static const NamespaceString kNss; +}; + +} // namespace mongo diff --git a/src/mongo/s/client/parallel.cpp b/src/mongo/s/client/parallel.cpp index 1b16dee2032..9e2aaaaf4e5 100644 --- a/src/mongo/s/client/parallel.cpp +++ b/src/mongo/s/client/parallel.cpp @@ -39,11 +39,9 @@ #include "mongo/db/bson/dotted_path_support.h" #include "mongo/db/query/query_request.h" #include "mongo/s/catalog_cache.h" -#include "mongo/s/chunk_manager.h" #include "mongo/s/client/shard_connection.h" #include "mongo/s/client/shard_registry.h" #include "mongo/s/grid.h" -#include "mongo/s/sharding_raii.h" #include "mongo/s/stale_exception.h" #include "mongo/util/log.h" #include "mongo/util/net/socket_exception.h" @@ -211,13 +209,13 @@ ParallelSortClusteredCursor::~ParallelSortClusteredCursor() { _done = true; } -void ParallelSortClusteredCursor::init(OperationContext* txn) { +void ParallelSortClusteredCursor::init(OperationContext* opCtx) { if (_didInit) return; _didInit = true; if (!_qSpec.isEmpty()) { - fullInit(txn); + fullInit(opCtx); } else { // You can only get here by using the legacy constructor // TODO: Eliminate this @@ -316,51 +314,29 @@ void ParallelSortClusteredCursor::_finishCons() { 17306, "have to have all text meta sort keys in projection", textMetaSortKeyFields.empty()); } -void ParallelSortClusteredCursor::fullInit(OperationContext* txn) { - startInit(txn); - finishInit(txn); +void ParallelSortClusteredCursor::fullInit(OperationContext* opCtx) { + startInit(opCtx); + finishInit(opCtx); } -void ParallelSortClusteredCursor::_markStaleNS(OperationContext* txn, - const NamespaceString& staleNS, - const StaleConfigException& e, - bool& forceReload) { - if (e.requiresFullReload()) { - Grid::get(txn)->catalogCache()->invalidate(staleNS.db()); - } - - if (_staleNSMap.find(staleNS.ns()) == _staleNSMap.end()) +void ParallelSortClusteredCursor::_markStaleNS(const NamespaceString& staleNS, + const StaleConfigException& e) { + if (_staleNSMap.find(staleNS.ns()) == _staleNSMap.end()) { _staleNSMap[staleNS.ns()] = 1; + } - int tries = ++_staleNSMap[staleNS.ns()]; + const int tries = ++_staleNSMap[staleNS.ns()]; if (tries >= 5) { throw SendStaleConfigException(staleNS.ns(), - str::stream() << "too many retries of stale version info", + "too many retries of stale version info", e.getVersionReceived(), e.getVersionWanted()); } - - forceReload = tries > 2; -} - -void ParallelSortClusteredCursor::_handleStaleNS(OperationContext* txn, - const NamespaceString& staleNS, - bool forceReload) { - auto scopedCMStatus = ScopedChunkManager::get(txn, staleNS); - if (!scopedCMStatus.isOK()) { - log() << "cannot reload database info for stale namespace " << staleNS.ns(); - return; - } - - const auto& scopedCM = scopedCMStatus.getValue(); - - // Reload chunk manager, potentially forcing the namespace - scopedCM.db()->getChunkManagerIfExists(txn, staleNS.ns(), true, forceReload); } void ParallelSortClusteredCursor::setupVersionAndHandleSlaveOk( - OperationContext* txn, + OperationContext* opCtx, std::shared_ptr<ParallelConnectionState> state, const ShardId& shardId, std::shared_ptr<Shard> primary, @@ -377,7 +353,8 @@ void ParallelSortClusteredCursor::setupVersionAndHandleSlaveOk( // Setup conn if (!state->conn) { - const auto shard = uassertStatusOK(Grid::get(txn)->shardRegistry()->getShard(txn, shardId)); + const auto shard = + uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardId)); state->conn.reset(new ShardConnection(shard->getConnString(), ns.ns(), manager)); } @@ -440,7 +417,7 @@ void ParallelSortClusteredCursor::setupVersionAndHandleSlaveOk( } } -void ParallelSortClusteredCursor::startInit(OperationContext* txn) { +void ParallelSortClusteredCursor::startInit(OperationContext* opCtx) { const bool returnPartial = (_qSpec.options() & QueryOption_PartialResults); const NamespaceString nss(!_cInfo.isEmpty() ? _cInfo.versionedNS : _qSpec.ns()); @@ -458,12 +435,12 @@ void ParallelSortClusteredCursor::startInit(OperationContext* txn) { shared_ptr<Shard> primary; { - auto scopedCMStatus = ScopedChunkManager::get(txn, nss); - if (scopedCMStatus != ErrorCodes::NamespaceNotFound) { - uassertStatusOK(scopedCMStatus.getStatus()); - const auto& scopedCM = scopedCMStatus.getValue(); - manager = scopedCM.cm(); - primary = scopedCM.primary(); + auto routingInfoStatus = + Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfo(opCtx, nss); + if (routingInfoStatus != ErrorCodes::NamespaceNotFound) { + auto routingInfo = uassertStatusOK(std::move(routingInfoStatus)); + manager = routingInfo.cm(); + primary = routingInfo.primary(); } } @@ -476,7 +453,7 @@ void ParallelSortClusteredCursor::startInit(OperationContext* txn) { << manager->getVersion().toString() << "]"; } - manager->getShardIdsForQuery(txn, + manager->getShardIdsForQuery(opCtx, !_cInfo.isEmpty() ? _cInfo.cmdFilter : _qSpec.filter(), !_cInfo.isEmpty() ? _cInfo.cmdCollation : BSONObj(), &shardIds); @@ -551,7 +528,7 @@ void ParallelSortClusteredCursor::startInit(OperationContext* txn) { mdata.pcState = std::make_shared<ParallelConnectionState>(); auto state = mdata.pcState; - setupVersionAndHandleSlaveOk(txn, state, shardId, primary, nss, vinfo, manager); + setupVersionAndHandleSlaveOk(opCtx, state, shardId, primary, nss, vinfo, manager); const string& ns = _qSpec.ns(); @@ -641,23 +618,20 @@ void ParallelSortClusteredCursor::startInit(OperationContext* txn) { if (staleNS.size() == 0) staleNS = nss; // ns is the *versioned* namespace, be careful of this - // Probably need to retry fully - bool forceReload; - _markStaleNS(txn, staleNS, e, forceReload); + _markStaleNS(staleNS, e); + Grid::get(opCtx)->catalogCache()->invalidateShardedCollection(staleNS); - LOG(1) << "stale config of ns " << staleNS - << " during initialization, will retry with forced : " << forceReload + LOG(1) << "stale config of ns " << staleNS << " during initialization, will retry" << causedBy(redact(e)); // This is somewhat strange - if (staleNS != nss) + if (staleNS != nss) { warning() << "versioned ns " << nss.ns() << " doesn't match stale config namespace " << staleNS; - - _handleStaleNS(txn, staleNS, forceReload); + } // Restart with new chunk manager - startInit(txn); + startInit(opCtx); return; } catch (SocketException& e) { warning() << "socket exception when initializing on " << shardId @@ -727,7 +701,7 @@ void ParallelSortClusteredCursor::startInit(OperationContext* txn) { } } -void ParallelSortClusteredCursor::finishInit(OperationContext* txn) { +void ParallelSortClusteredCursor::finishInit(OperationContext* opCtx) { bool returnPartial = (_qSpec.options() & QueryOption_PartialResults); bool specialVersion = _cInfo.versionedNS.size() > 0; string ns = specialVersion ? _cInfo.versionedNS : _qSpec.ns(); @@ -859,32 +833,27 @@ void ParallelSortClusteredCursor::finishInit(OperationContext* txn) { if (retry) { // Refresh stale namespaces if (staleNSExceptions.size()) { - for (map<string, StaleConfigException>::iterator i = staleNSExceptions.begin(), - end = staleNSExceptions.end(); - i != end; - ++i) { - NamespaceString staleNS(i->first); - const StaleConfigException& exception = i->second; + for (const auto& exEntry : staleNSExceptions) { + const NamespaceString staleNS(exEntry.first); + const StaleConfigException& ex = exEntry.second; - bool forceReload; - _markStaleNS(txn, staleNS, exception, forceReload); + _markStaleNS(staleNS, ex); + Grid::get(opCtx)->catalogCache()->invalidateShardedCollection(staleNS); - LOG(1) << "stale config of ns " << staleNS - << " on finishing query, will retry with forced : " << forceReload - << causedBy(redact(exception)); + LOG(1) << "stale config of ns " << staleNS << " on finishing query, will retry" + << causedBy(redact(ex)); // This is somewhat strange - if (staleNS != ns) + if (staleNS != ns) { warning() << "versioned ns " << ns << " doesn't match stale config namespace " << staleNS; - - _handleStaleNS(txn, staleNS, forceReload); + } } } // Re-establish connections we need to - startInit(txn); - finishInit(txn); + startInit(opCtx); + finishInit(opCtx); return; } @@ -924,7 +893,8 @@ void ParallelSortClusteredCursor::finishInit(OperationContext* txn) { _cursors[index].reset(mdata.pcState->cursor.get(), &mdata); - const auto shard = uassertStatusOK(Grid::get(txn)->shardRegistry()->getShard(txn, shardId)); + const auto shard = + uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardId)); _servers.insert(shard->getConnString().toString()); index++; diff --git a/src/mongo/s/client/parallel.h b/src/mongo/s/client/parallel.h index aeb709f0ffd..4167d6e91f0 100644 --- a/src/mongo/s/client/parallel.h +++ b/src/mongo/s/client/parallel.h @@ -117,11 +117,7 @@ private: void _finishCons(); - void _markStaleNS(OperationContext* txn, - const NamespaceString& staleNS, - const StaleConfigException& e, - bool& forceReload); - void _handleStaleNS(OperationContext* txn, const NamespaceString& staleNS, bool forceReload); + void _markStaleNS(const NamespaceString& staleNS, const StaleConfigException& e); bool _didInit; bool _done; diff --git a/src/mongo/s/client/version_manager.cpp b/src/mongo/s/client/version_manager.cpp index 2e6a35f5e09..023eaa7bfc9 100644 --- a/src/mongo/s/client/version_manager.cpp +++ b/src/mongo/s/client/version_manager.cpp @@ -36,13 +36,13 @@ #include "mongo/db/namespace_string.h" #include "mongo/s/catalog/sharding_catalog_client.h" #include "mongo/s/catalog_cache.h" +#include "mongo/s/catalog_cache.h" #include "mongo/s/chunk_version.h" #include "mongo/s/client/shard_connection.h" #include "mongo/s/client/shard_registry.h" #include "mongo/s/grid.h" #include "mongo/s/mongos_options.h" #include "mongo/s/set_shard_version_request.h" -#include "mongo/s/sharding_raii.h" #include "mongo/s/stale_exception.h" #include "mongo/util/log.h" @@ -107,7 +107,7 @@ private: /** * Sends the setShardVersion command on the specified connection. */ -bool setShardVersion(OperationContext* txn, +bool setShardVersion(OperationContext* opCtx, DBClientBase* conn, const string& ns, const ConnectionString& configServer, @@ -174,7 +174,7 @@ DBClientBase* getVersionable(DBClientBase* conn) { * Eventually this should go completely away, but for now many commands rely on unversioned but * mongos-specific behavior on mongod (auditing and replication information in commands) */ -bool initShardVersionEmptyNS(OperationContext* txn, DBClientBase* conn_in) { +bool initShardVersionEmptyNS(OperationContext* opCtx, DBClientBase* conn_in) { try { // May throw if replica set primary is down DBClientBase* const conn = getVersionable(conn_in); @@ -187,7 +187,7 @@ bool initShardVersionEmptyNS(OperationContext* txn, DBClientBase* conn_in) { } BSONObj result; - const bool ok = setShardVersion(txn, + const bool ok = setShardVersion(opCtx, conn, "", grid.shardRegistry()->getConfigServerConnectionString(), @@ -241,7 +241,7 @@ bool initShardVersionEmptyNS(OperationContext* txn, DBClientBase* conn_in) { * * @return true if we contacted the remote host */ -bool checkShardVersion(OperationContext* txn, +bool checkShardVersion(OperationContext* opCtx, DBClientBase* conn_in, const string& ns, shared_ptr<ChunkManager> refManager, @@ -249,7 +249,7 @@ bool checkShardVersion(OperationContext* txn, int tryNumber) { // Empty namespaces are special - we require initialization but not versioning if (ns.size() == 0) { - return initShardVersionEmptyNS(txn, conn_in); + return initShardVersionEmptyNS(opCtx, conn_in); } DBClientBase* const conn = getVersionable(conn_in); @@ -257,21 +257,21 @@ bool checkShardVersion(OperationContext* txn, const NamespaceString nss(ns); + auto const catalogCache = Grid::get(opCtx)->catalogCache(); + if (authoritative) { - ScopedChunkManager::refreshAndGet(txn, nss); + Grid::get(opCtx)->catalogCache()->invalidateShardedCollection(nss); } - auto scopedCMStatus = ScopedChunkManager::get(txn, nss); - - if (!scopedCMStatus.isOK()) { + auto routingInfoStatus = catalogCache->getCollectionRoutingInfo(opCtx, nss); + if (!routingInfoStatus.isOK()) { return false; } - const auto& scopedCM = scopedCMStatus.getValue(); + auto& routingInfo = routingInfoStatus.getValue(); - auto conf = scopedCM.db(); - const auto manager = scopedCM.cm(); - const auto primary = scopedCM.primary(); + const auto manager = routingInfo.cm(); + const auto primary = routingInfo.primary(); unsigned long long officialSequenceNumber = 0; @@ -283,7 +283,7 @@ bool checkShardVersion(OperationContext* txn, return false; } - const auto shardRegistry = Grid::get(txn)->shardRegistry(); + const auto shardRegistry = Grid::get(opCtx)->shardRegistry(); const auto shard = shardRegistry->getShardForHostNoReload( uassertStatusOK(HostAndPort::parse(conn->getServerAddress()))); @@ -350,7 +350,7 @@ bool checkShardVersion(OperationContext* txn, << ", current chunk manager iteration is " << officialSequenceNumber; BSONObj result; - if (setShardVersion(txn, + if (setShardVersion(opCtx, conn, ns, shardRegistry->getConfigServerConnectionString(), @@ -375,20 +375,11 @@ bool checkShardVersion(OperationContext* txn, if (!authoritative) { // use the original connection and get a fresh versionable connection // since conn can be invalidated (or worse, freed) after the failure - checkShardVersion(txn, conn_in, ns, refManager, 1, tryNumber + 1); + checkShardVersion(opCtx, conn_in, ns, refManager, 1, tryNumber + 1); return true; } - if (result["reloadConfig"].trueValue()) { - if (result["version"].timestampTime() == Date_t()) { - warning() << "reloading full configuration for " << conf->name() - << ", connection state indicates significant version changes"; - - Grid::get(txn)->catalogCache()->invalidate(nss.db()); - } - - conf->getChunkManager(txn, nss.ns(), true); - } + Grid::get(opCtx)->catalogCache()->onStaleConfigError(std::move(routingInfo)); const int maxNumTries = 7; if (tryNumber < maxNumTries) { @@ -397,7 +388,7 @@ bool checkShardVersion(OperationContext* txn, sleepmillis(10 * tryNumber); // use the original connection and get a fresh versionable connection // since conn can be invalidated (or worse, freed) after the failure - checkShardVersion(txn, conn_in, ns, refManager, true, tryNumber + 1); + checkShardVersion(opCtx, conn_in, ns, refManager, true, tryNumber + 1); return true; } @@ -426,20 +417,20 @@ bool VersionManager::isVersionableCB(DBClientBase* conn) { return conn->type() == ConnectionString::MASTER || conn->type() == ConnectionString::SET; } -bool VersionManager::checkShardVersionCB(OperationContext* txn, +bool VersionManager::checkShardVersionCB(OperationContext* opCtx, DBClientBase* conn_in, const string& ns, bool authoritative, int tryNumber) { - return checkShardVersion(txn, conn_in, ns, nullptr, authoritative, tryNumber); + return checkShardVersion(opCtx, conn_in, ns, nullptr, authoritative, tryNumber); } -bool VersionManager::checkShardVersionCB(OperationContext* txn, +bool VersionManager::checkShardVersionCB(OperationContext* opCtx, ShardConnection* conn_in, bool authoritative, int tryNumber) { return checkShardVersion( - txn, conn_in->get(), conn_in->getNS(), conn_in->getManager(), authoritative, tryNumber); + opCtx, conn_in->get(), conn_in->getNS(), conn_in->getManager(), authoritative, tryNumber); } } // namespace mongo diff --git a/src/mongo/s/commands/SConscript b/src/mongo/s/commands/SConscript index 300d0c1b973..800f8fe6005 100644 --- a/src/mongo/s/commands/SConscript +++ b/src/mongo/s/commands/SConscript @@ -26,6 +26,7 @@ env.Library( 'cluster_apply_ops_cmd.cpp', 'cluster_available_query_options_cmd.cpp', 'cluster_commands_common.cpp', + 'cluster_compact_cmd.cpp', 'cluster_control_balancer_cmd.cpp', 'cluster_count_cmd.cpp', 'cluster_current_op.cpp', diff --git a/src/mongo/s/commands/chunk_manager_targeter.cpp b/src/mongo/s/commands/chunk_manager_targeter.cpp index 9dfef97dfda..d490bfb9c58 100644 --- a/src/mongo/s/commands/chunk_manager_targeter.cpp +++ b/src/mongo/s/commands/chunk_manager_targeter.cpp @@ -32,30 +32,18 @@ #include "mongo/s/commands/chunk_manager_targeter.h" -#include <boost/thread/tss.hpp> - #include "mongo/db/matcher/extensions_callback_noop.h" #include "mongo/db/operation_context.h" #include "mongo/db/query/canonical_query.h" #include "mongo/db/query/collation/collation_index_key.h" -#include "mongo/s/catalog_cache.h" -#include "mongo/s/chunk.h" #include "mongo/s/client/shard_registry.h" +#include "mongo/s/commands/cluster_commands_common.h" #include "mongo/s/grid.h" #include "mongo/s/shard_key_pattern.h" -#include "mongo/s/sharding_raii.h" #include "mongo/util/log.h" #include "mongo/util/mongoutils/str.h" namespace mongo { - -using std::shared_ptr; -using str::stream; -using std::map; -using std::set; -using std::string; -using std::vector; - namespace { enum UpdateType { UpdateType_Replacement, UpdateType_OpStyle, UpdateType_Unknown }; @@ -64,11 +52,6 @@ enum CompareResult { CompareResult_Unknown, CompareResult_GTE, CompareResult_LT const ShardKeyPattern virtualIdShardKey(BSON("_id" << 1)); -// To match legacy reload behavior, we have to backoff on config reload per-thread -// TODO: Centralize this behavior better by refactoring config reload in mongos -boost::thread_specific_ptr<Backoff> perThreadBackoff; -const int maxWaitMillis = 500; - /** * There are two styles of update expressions: * @@ -117,7 +100,7 @@ UpdateType getUpdateExprType(const BSONObj& updateExpr) { * { _id : { $lt : 30 } } => false * { foo : <anything> } => false */ -bool isExactIdQuery(OperationContext* txn, const CanonicalQuery& query, ChunkManager* manager) { +bool isExactIdQuery(OperationContext* opCtx, const CanonicalQuery& query, ChunkManager* manager) { auto shardKey = virtualIdShardKey.extractShardKeyFromQuery(query); BSONElement idElt = shardKey["_id"]; @@ -137,15 +120,6 @@ bool isExactIdQuery(OperationContext* txn, const CanonicalQuery& query, ChunkMan return true; } -void refreshBackoff() { - if (!perThreadBackoff.get()) { - perThreadBackoff.reset(new Backoff(maxWaitMillis, maxWaitMillis * 2)); - } - - perThreadBackoff.get()->nextSleepMillis(); -} - - // // Utilities to compare shard versions // @@ -172,25 +146,19 @@ CompareResult compareShardVersions(const ChunkVersion& shardVersionA, return CompareResult_Unknown; } - if (shardVersionA < shardVersionB) { + if (shardVersionA < shardVersionB) return CompareResult_LT; - } - else return CompareResult_GTE; } -ChunkVersion getShardVersion(StringData shardName, - const ChunkManager* manager, - const Shard* primary) { - dassert(!(manager && primary)); - dassert(manager || primary); - - if (primary) { - return ChunkVersion::UNSHARDED(); +ChunkVersion getShardVersion(const CachedCollectionRoutingInfo& routingInfo, + const ShardId& shardId) { + if (routingInfo.cm()) { + return routingInfo.cm()->getVersion(shardId); } - return manager->getVersion(shardName.toString()); + return ChunkVersion::UNSHARDED(); } /** @@ -204,26 +172,21 @@ ChunkVersion getShardVersion(StringData shardName, * Note that the signature here is weird since our cached map of chunk versions is stored in a * ChunkManager or is implicit in the primary shard of the collection. */ -CompareResult compareAllShardVersions(const ChunkManager* cachedChunkManager, - const Shard* cachedPrimary, - const map<ShardId, ChunkVersion>& remoteShardVersions) { +CompareResult compareAllShardVersions(const CachedCollectionRoutingInfo& routingInfo, + const ShardVersionMap& remoteShardVersions) { CompareResult finalResult = CompareResult_GTE; - for (map<ShardId, ChunkVersion>::const_iterator it = remoteShardVersions.begin(); - it != remoteShardVersions.end(); - ++it) { - // Get the remote and cached version for the next shard - const ShardId& shardName = it->first; - const ChunkVersion& remoteShardVersion = it->second; + for (const auto& shardVersionEntry : remoteShardVersions) { + const ShardId& shardId = shardVersionEntry.first; + const ChunkVersion& remoteShardVersion = shardVersionEntry.second; ChunkVersion cachedShardVersion; try { // Throws b/c shard constructor throws - cachedShardVersion = - getShardVersion(shardName.toString(), cachedChunkManager, cachedPrimary); + cachedShardVersion = getShardVersion(routingInfo, shardId); } catch (const DBException& ex) { - warning() << "could not lookup shard " << shardName + warning() << "could not lookup shard " << shardId << " in local cache, shard metadata may have changed" << " or be unavailable" << causedBy(ex); @@ -235,6 +198,7 @@ CompareResult compareAllShardVersions(const ChunkManager* cachedChunkManager, if (result == CompareResult_Unknown) return result; + if (result == CompareResult_LT) finalResult = CompareResult_LT; @@ -247,10 +211,10 @@ CompareResult compareAllShardVersions(const ChunkManager* cachedChunkManager, /** * Whether or not the manager/primary pair is different from the other manager/primary pair. */ -bool isMetadataDifferent(const shared_ptr<ChunkManager>& managerA, - const shared_ptr<Shard>& primaryA, - const shared_ptr<ChunkManager>& managerB, - const shared_ptr<Shard>& primaryB) { +bool isMetadataDifferent(const std::shared_ptr<ChunkManager>& managerA, + const std::shared_ptr<Shard>& primaryA, + const std::shared_ptr<ChunkManager>& managerB, + const std::shared_ptr<Shard>& primaryB) { if ((managerA && !managerB) || (!managerA && managerB) || (primaryA && !primaryB) || (!primaryA && primaryB)) return true; @@ -267,10 +231,10 @@ bool isMetadataDifferent(const shared_ptr<ChunkManager>& managerA, * Whether or not the manager/primary pair was changed or refreshed from a previous version * of the metadata. */ -bool wasMetadataRefreshed(const shared_ptr<ChunkManager>& managerA, - const shared_ptr<Shard>& primaryA, - const shared_ptr<ChunkManager>& managerB, - const shared_ptr<Shard>& primaryB) { +bool wasMetadataRefreshed(const std::shared_ptr<ChunkManager>& managerA, + const std::shared_ptr<Shard>& primaryA, + const std::shared_ptr<ChunkManager>& managerB, + const std::shared_ptr<Shard>& primaryB) { if (isMetadataDifferent(managerA, primaryA, managerB, primaryB)) return true; @@ -287,16 +251,19 @@ bool wasMetadataRefreshed(const shared_ptr<ChunkManager>& managerA, ChunkManagerTargeter::ChunkManagerTargeter(const NamespaceString& nss, TargeterStats* stats) : _nss(nss), _needsTargetingRefresh(false), _stats(stats) {} +Status ChunkManagerTargeter::init(OperationContext* opCtx) { + auto shardDbStatus = createShardDatabase(opCtx, _nss.db()); + if (!shardDbStatus.isOK()) { + return shardDbStatus.getStatus(); + } -Status ChunkManagerTargeter::init(OperationContext* txn) { - auto scopedCMStatus = ScopedChunkManager::getOrCreate(txn, _nss); - if (!scopedCMStatus.isOK()) { - return scopedCMStatus.getStatus(); + const auto routingInfoStatus = + Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfo(opCtx, _nss); + if (!routingInfoStatus.isOK()) { + return routingInfoStatus.getStatus(); } - const auto& scopedCM = scopedCMStatus.getValue(); - _manager = scopedCM.cm(); - _primary = scopedCM.primary(); + _routingInfo = std::move(routingInfoStatus.getValue()); return Status::OK(); } @@ -305,26 +272,26 @@ const NamespaceString& ChunkManagerTargeter::getNS() const { return _nss; } -Status ChunkManagerTargeter::targetInsert(OperationContext* txn, +Status ChunkManagerTargeter::targetInsert(OperationContext* opCtx, const BSONObj& doc, ShardEndpoint** endpoint) const { BSONObj shardKey; - if (_manager) { + if (_routingInfo->cm()) { // // Sharded collections have the following requirements for targeting: // // Inserts must contain the exact shard key. // - shardKey = _manager->getShardKeyPattern().extractShardKeyFromDoc(doc); + shardKey = _routingInfo->cm()->getShardKeyPattern().extractShardKeyFromDoc(doc); // Check shard key exists if (shardKey.isEmpty()) { - return Status(ErrorCodes::ShardKeyNotFound, - stream() << "document " << doc - << " does not contain shard key for pattern " - << _manager->getShardKeyPattern().toString()); + return {ErrorCodes::ShardKeyNotFound, + str::stream() << "document " << doc + << " does not contain shard key for pattern " + << _routingInfo->cm()->getShardKeyPattern().toString()}; } // Check shard key size on insert @@ -337,21 +304,21 @@ Status ChunkManagerTargeter::targetInsert(OperationContext* txn, if (!shardKey.isEmpty()) { *endpoint = targetShardKey(shardKey, CollationSpec::kSimpleSpec, doc.objsize()).release(); } else { - if (!_primary) { + if (!_routingInfo->primary()) { return Status(ErrorCodes::NamespaceNotFound, str::stream() << "could not target insert in collection " << getNS().ns() << "; no metadata found"); } - *endpoint = new ShardEndpoint(_primary->getId(), ChunkVersion::UNSHARDED()); + *endpoint = new ShardEndpoint(_routingInfo->primary()->getId(), ChunkVersion::UNSHARDED()); } return Status::OK(); } -Status ChunkManagerTargeter::targetUpdate(OperationContext* txn, +Status ChunkManagerTargeter::targetUpdate(OperationContext* opCtx, const BatchedUpdateDocument& updateDoc, - vector<ShardEndpoint*>* endpoints) const { + std::vector<ShardEndpoint*>* endpoints) const { // // Update targeting may use either the query or the update. This is to support save-style // updates, of the form: @@ -374,14 +341,14 @@ Status ChunkManagerTargeter::targetUpdate(OperationContext* txn, UpdateType updateType = getUpdateExprType(updateDoc.getUpdateExpr()); if (updateType == UpdateType_Unknown) { - return Status(ErrorCodes::UnsupportedFormat, - stream() << "update document " << updateExpr - << " has mixed $operator and non-$operator style fields"); + return {ErrorCodes::UnsupportedFormat, + str::stream() << "update document " << updateExpr + << " has mixed $operator and non-$operator style fields"}; } BSONObj shardKey; - if (_manager) { + if (_routingInfo->cm()) { // // Sharded collections have the following futher requirements for targeting: // @@ -393,7 +360,7 @@ Status ChunkManagerTargeter::targetUpdate(OperationContext* txn, if (updateType == UpdateType_OpStyle) { // Target using the query StatusWith<BSONObj> status = - _manager->getShardKeyPattern().extractShardKeyFromQuery(txn, query); + _routingInfo->cm()->getShardKeyPattern().extractShardKeyFromQuery(opCtx, query); // Bad query if (!status.isOK()) @@ -402,7 +369,7 @@ Status ChunkManagerTargeter::targetUpdate(OperationContext* txn, shardKey = status.getValue(); } else { // Target using the replacement document - shardKey = _manager->getShardKeyPattern().extractShardKeyFromDoc(updateExpr); + shardKey = _routingInfo->cm()->getShardKeyPattern().extractShardKeyFromDoc(updateExpr); } // Check shard key size on upsert. @@ -430,13 +397,13 @@ Status ChunkManagerTargeter::targetUpdate(OperationContext* txn, // We failed to target a single shard. // Upserts are required to target a single shard. - if (_manager && updateDoc.getUpsert()) { + if (_routingInfo->cm() && updateDoc.getUpsert()) { return Status(ErrorCodes::ShardKeyNotFound, str::stream() << "An upsert on a sharded collection must contain the shard " "key and have the simple collation. Update request: " << updateDoc.toBSON() << ", shard key pattern: " - << _manager->getShardKeyPattern().toString()); + << _routingInfo->cm()->getShardKeyPattern().toString()); } // Parse update query. @@ -445,7 +412,7 @@ Status ChunkManagerTargeter::targetUpdate(OperationContext* txn, if (!collation.isEmpty()) { qr->setCollation(collation); } - auto cq = CanonicalQuery::canonicalize(txn, std::move(qr), ExtensionsCallbackNoop()); + auto cq = CanonicalQuery::canonicalize(opCtx, std::move(qr), ExtensionsCallbackNoop()); if (!cq.isOK()) { return Status(cq.getStatus().code(), str::stream() << "Could not parse update query " << updateDoc.getQuery() @@ -453,7 +420,8 @@ Status ChunkManagerTargeter::targetUpdate(OperationContext* txn, } // Single (non-multi) updates must target a single shard or be exact-ID. - if (_manager && !updateDoc.getMulti() && !isExactIdQuery(txn, *cq.getValue(), _manager.get())) { + if (_routingInfo->cm() && !updateDoc.getMulti() && + !isExactIdQuery(opCtx, *cq.getValue(), _routingInfo->cm().get())) { return Status(ErrorCodes::ShardKeyNotFound, str::stream() << "A single update on a sharded collection must contain an exact " @@ -462,22 +430,22 @@ Status ChunkManagerTargeter::targetUpdate(OperationContext* txn, "request: " << updateDoc.toBSON() << ", shard key pattern: " - << _manager->getShardKeyPattern().toString()); + << _routingInfo->cm()->getShardKeyPattern().toString()); } if (updateType == UpdateType_OpStyle) { - return targetQuery(txn, query, collation, endpoints); + return targetQuery(opCtx, query, collation, endpoints); } else { - return targetDoc(txn, updateExpr, collation, endpoints); + return targetDoc(opCtx, updateExpr, collation, endpoints); } } -Status ChunkManagerTargeter::targetDelete(OperationContext* txn, +Status ChunkManagerTargeter::targetDelete(OperationContext* opCtx, const BatchedDeleteDocument& deleteDoc, - vector<ShardEndpoint*>* endpoints) const { + std::vector<ShardEndpoint*>* endpoints) const { BSONObj shardKey; - if (_manager) { + if (_routingInfo->cm()) { // // Sharded collections have the following further requirements for targeting: // @@ -486,7 +454,8 @@ Status ChunkManagerTargeter::targetDelete(OperationContext* txn, // Get the shard key StatusWith<BSONObj> status = - _manager->getShardKeyPattern().extractShardKeyFromQuery(txn, deleteDoc.getQuery()); + _routingInfo->cm()->getShardKeyPattern().extractShardKeyFromQuery(opCtx, + deleteDoc.getQuery()); // Bad query if (!status.isOK()) @@ -516,7 +485,7 @@ Status ChunkManagerTargeter::targetDelete(OperationContext* txn, if (!collation.isEmpty()) { qr->setCollation(collation); } - auto cq = CanonicalQuery::canonicalize(txn, std::move(qr), ExtensionsCallbackNoop()); + auto cq = CanonicalQuery::canonicalize(opCtx, std::move(qr), ExtensionsCallbackNoop()); if (!cq.isOK()) { return Status(cq.getStatus().code(), str::stream() << "Could not parse delete query " << deleteDoc.getQuery() @@ -524,8 +493,8 @@ Status ChunkManagerTargeter::targetDelete(OperationContext* txn, } // Single deletes must target a single shard or be exact-ID. - if (_manager && deleteDoc.getLimit() == 1 && - !isExactIdQuery(txn, *cq.getValue(), _manager.get())) { + if (_routingInfo->cm() && deleteDoc.getLimit() == 1 && + !isExactIdQuery(opCtx, *cq.getValue(), _routingInfo->cm().get())) { return Status(ErrorCodes::ShardKeyNotFound, str::stream() << "A single delete on a sharded collection must contain an exact " @@ -534,45 +503,47 @@ Status ChunkManagerTargeter::targetDelete(OperationContext* txn, "request: " << deleteDoc.toBSON() << ", shard key pattern: " - << _manager->getShardKeyPattern().toString()); + << _routingInfo->cm()->getShardKeyPattern().toString()); } - return targetQuery(txn, deleteDoc.getQuery(), collation, endpoints); + return targetQuery(opCtx, deleteDoc.getQuery(), collation, endpoints); } -Status ChunkManagerTargeter::targetDoc(OperationContext* txn, +Status ChunkManagerTargeter::targetDoc(OperationContext* opCtx, const BSONObj& doc, const BSONObj& collation, - vector<ShardEndpoint*>* endpoints) const { + std::vector<ShardEndpoint*>* endpoints) const { // NOTE: This is weird and fragile, but it's the way our language works right now - // documents are either A) invalid or B) valid equality queries over themselves. - return targetQuery(txn, doc, collation, endpoints); + return targetQuery(opCtx, doc, collation, endpoints); } -Status ChunkManagerTargeter::targetQuery(OperationContext* txn, +Status ChunkManagerTargeter::targetQuery(OperationContext* opCtx, const BSONObj& query, const BSONObj& collation, - vector<ShardEndpoint*>* endpoints) const { - if (!_primary && !_manager) { - return Status(ErrorCodes::NamespaceNotFound, - stream() << "could not target query in " << getNS().ns() - << "; no metadata found"); + std::vector<ShardEndpoint*>* endpoints) const { + if (!_routingInfo->primary() && !_routingInfo->cm()) { + return {ErrorCodes::NamespaceNotFound, + str::stream() << "could not target query in " << getNS().ns() + << "; no metadata found"}; } - set<ShardId> shardIds; - if (_manager) { + std::set<ShardId> shardIds; + if (_routingInfo->cm()) { try { - _manager->getShardIdsForQuery(txn, query, collation, &shardIds); + _routingInfo->cm()->getShardIdsForQuery(opCtx, query, collation, &shardIds); } catch (const DBException& ex) { return ex.toStatus(); } } else { - shardIds.insert(_primary->getId()); + shardIds.insert(_routingInfo->primary()->getId()); } for (const ShardId& shardId : shardIds) { - endpoints->push_back(new ShardEndpoint( - shardId, _manager ? _manager->getVersion(shardId) : ChunkVersion::UNSHARDED())); + endpoints->push_back(new ShardEndpoint(shardId, + _routingInfo->cm() + ? _routingInfo->cm()->getVersion(shardId) + : ChunkVersion::UNSHARDED())); } return Status::OK(); @@ -581,7 +552,7 @@ Status ChunkManagerTargeter::targetQuery(OperationContext* txn, std::unique_ptr<ShardEndpoint> ChunkManagerTargeter::targetShardKey(const BSONObj& shardKey, const BSONObj& collation, long long estDataSize) const { - auto chunk = _manager->findIntersectingChunk(shardKey, collation); + auto chunk = _routingInfo->cm()->findIntersectingChunk(shardKey, collation); // Track autosplit stats for sharded collections // Note: this is only best effort accounting and is not accurate. @@ -590,45 +561,48 @@ std::unique_ptr<ShardEndpoint> ChunkManagerTargeter::targetShardKey(const BSONOb } return stdx::make_unique<ShardEndpoint>(chunk->getShardId(), - _manager->getVersion(chunk->getShardId())); + _routingInfo->cm()->getVersion(chunk->getShardId())); } -Status ChunkManagerTargeter::targetCollection(vector<ShardEndpoint*>* endpoints) const { - if (!_primary && !_manager) { - return Status(ErrorCodes::NamespaceNotFound, - str::stream() << "could not target full range of " << getNS().ns() - << "; metadata not found"); +Status ChunkManagerTargeter::targetCollection(std::vector<ShardEndpoint*>* endpoints) const { + if (!_routingInfo->primary() && !_routingInfo->cm()) { + return {ErrorCodes::NamespaceNotFound, + str::stream() << "could not target full range of " << getNS().ns() + << "; metadata not found"}; } - set<ShardId> shardIds; - if (_manager) { - _manager->getAllShardIds(&shardIds); + std::set<ShardId> shardIds; + if (_routingInfo->cm()) { + _routingInfo->cm()->getAllShardIds(&shardIds); } else { - shardIds.insert(_primary->getId()); + shardIds.insert(_routingInfo->primary()->getId()); } for (const ShardId& shardId : shardIds) { - endpoints->push_back(new ShardEndpoint( - shardId, _manager ? _manager->getVersion(shardId) : ChunkVersion::UNSHARDED())); + endpoints->push_back(new ShardEndpoint(shardId, + _routingInfo->cm() + ? _routingInfo->cm()->getVersion(shardId) + : ChunkVersion::UNSHARDED())); } return Status::OK(); } -Status ChunkManagerTargeter::targetAllShards(vector<ShardEndpoint*>* endpoints) const { - if (!_primary && !_manager) { - return Status(ErrorCodes::NamespaceNotFound, - str::stream() << "could not target every shard with versions for " - << getNS().ns() - << "; metadata not found"); +Status ChunkManagerTargeter::targetAllShards(std::vector<ShardEndpoint*>* endpoints) const { + if (!_routingInfo->primary() && !_routingInfo->cm()) { + return {ErrorCodes::NamespaceNotFound, + str::stream() << "could not target every shard with versions for " << getNS().ns() + << "; metadata not found"}; } - vector<ShardId> shardIds; + std::vector<ShardId> shardIds; grid.shardRegistry()->getAllShardIds(&shardIds); for (const ShardId& shardId : shardIds) { - endpoints->push_back(new ShardEndpoint( - shardId, _manager ? _manager->getVersion(shardId) : ChunkVersion::UNSHARDED())); + endpoints->push_back(new ShardEndpoint(shardId, + _routingInfo->cm() + ? _routingInfo->cm()->getVersion(shardId) + : ChunkVersion::UNSHARDED())); } return Status::OK(); @@ -642,8 +616,7 @@ void ChunkManagerTargeter::noteStaleResponse(const ShardEndpoint& endpoint, if (staleInfo["vWanted"].eoo()) { // If we don't have a vWanted sent, assume the version is higher than our current // version. - remoteShardVersion = - getShardVersion(endpoint.shardName.toString(), _manager.get(), _primary.get()); + remoteShardVersion = getShardVersion(*_routingInfo, endpoint.shardName); remoteShardVersion.incMajor(); } else { remoteShardVersion = ChunkVersion::fromBSON(staleInfo, "vWanted"); @@ -671,7 +644,7 @@ void ChunkManagerTargeter::noteCouldNotTarget() { _needsTargetingRefresh = true; } -Status ChunkManagerTargeter::refreshIfNeeded(OperationContext* txn, bool* wasChanged) { +Status ChunkManagerTargeter::refreshIfNeeded(OperationContext* opCtx, bool* wasChanged) { bool dummy; if (!wasChanged) { wasChanged = &dummy; @@ -691,18 +664,14 @@ Status ChunkManagerTargeter::refreshIfNeeded(OperationContext* txn, bool* wasCha // Get the latest metadata information from the cache if there were issues // - shared_ptr<ChunkManager> lastManager = _manager; - shared_ptr<Shard> lastPrimary = _primary; + auto lastManager = _routingInfo->cm(); + auto lastPrimary = _routingInfo->primary(); - auto scopedCMStatus = ScopedChunkManager::getOrCreate(txn, _nss); - if (!scopedCMStatus.isOK()) { - return scopedCMStatus.getStatus(); + auto initStatus = init(opCtx); + if (!initStatus.isOK()) { + return initStatus; } - const auto& scopedCM = scopedCMStatus.getValue(); - _manager = scopedCM.cm(); - _primary = scopedCM.primary(); - // We now have the latest metadata from the cache. // @@ -710,8 +679,6 @@ Status ChunkManagerTargeter::refreshIfNeeded(OperationContext* txn, bool* wasCha // Either we couldn't target at all, or we have stale versions, but not both. // - dassert(!(_needsTargetingRefresh && !_remoteShardVersions.empty())); - if (_needsTargetingRefresh) { // Reset the field _needsTargetingRefresh = false; @@ -719,63 +686,44 @@ Status ChunkManagerTargeter::refreshIfNeeded(OperationContext* txn, bool* wasCha // If we couldn't target, we might need to refresh if we haven't remotely refreshed the // metadata since we last got it from the cache. - bool alreadyRefreshed = wasMetadataRefreshed(lastManager, lastPrimary, _manager, _primary); + bool alreadyRefreshed = wasMetadataRefreshed( + lastManager, lastPrimary, _routingInfo->cm(), _routingInfo->primary()); // If didn't already refresh the targeting information, refresh it if (!alreadyRefreshed) { // To match previous behavior, we just need an incremental refresh here - return refreshNow(txn, RefreshType_RefreshChunkManager); + return refreshNow(opCtx); } - *wasChanged = isMetadataDifferent(lastManager, lastPrimary, _manager, _primary); + *wasChanged = isMetadataDifferent( + lastManager, lastPrimary, _routingInfo->cm(), _routingInfo->primary()); return Status::OK(); } else if (!_remoteShardVersions.empty()) { // If we got stale shard versions from remote shards, we may need to refresh // NOTE: Not sure yet if this can happen simultaneously with targeting issues - CompareResult result = - compareAllShardVersions(_manager.get(), _primary.get(), _remoteShardVersions); + CompareResult result = compareAllShardVersions(*_routingInfo, _remoteShardVersions); + // Reset the versions _remoteShardVersions.clear(); - if (result == CompareResult_Unknown) { + if (result == CompareResult_Unknown || result == CompareResult_LT) { // Our current shard versions aren't all comparable to the old versions, maybe drop - return refreshNow(txn, RefreshType_ReloadDatabase); - } else if (result == CompareResult_LT) { - // Our current shard versions are less than the remote versions, but no drop - return refreshNow(txn, RefreshType_RefreshChunkManager); + return refreshNow(opCtx); } - *wasChanged = isMetadataDifferent(lastManager, lastPrimary, _manager, _primary); + *wasChanged = isMetadataDifferent( + lastManager, lastPrimary, _routingInfo->cm(), _routingInfo->primary()); return Status::OK(); } - // unreachable - dassert(false); - return Status::OK(); + MONGO_UNREACHABLE; } -Status ChunkManagerTargeter::refreshNow(OperationContext* txn, RefreshType refreshType) { - if (refreshType == RefreshType_ReloadDatabase) { - Grid::get(txn)->catalogCache()->invalidate(_nss.db().toString()); - } - - // Try not to spam the configs - refreshBackoff(); +Status ChunkManagerTargeter::refreshNow(OperationContext* opCtx) { + Grid::get(opCtx)->catalogCache()->onStaleConfigError(std::move(*_routingInfo)); - ScopedChunkManager::refreshAndGet(txn, _nss); - - auto scopedCMStatus = ScopedChunkManager::get(txn, _nss); - if (!scopedCMStatus.isOK()) { - return scopedCMStatus.getStatus(); - } - - const auto& scopedCM = scopedCMStatus.getValue(); - - _manager = scopedCM.cm(); - _primary = scopedCM.primary(); - - return Status::OK(); + return init(opCtx); } } // namespace mongo diff --git a/src/mongo/s/commands/chunk_manager_targeter.h b/src/mongo/s/commands/chunk_manager_targeter.h index 9c8f136dad8..1b845879144 100644 --- a/src/mongo/s/commands/chunk_manager_targeter.h +++ b/src/mongo/s/commands/chunk_manager_targeter.h @@ -34,12 +34,12 @@ #include "mongo/bson/bsonobj_comparator_interface.h" #include "mongo/bson/simple_bsonobj_comparator.h" #include "mongo/db/namespace_string.h" +#include "mongo/s/catalog_cache.h" #include "mongo/s/ns_targeter.h" namespace mongo { class ChunkManager; -class CollatorInterface; class OperationContext; class Shard; struct ChunkVersion; @@ -106,21 +106,12 @@ public: Status refreshIfNeeded(OperationContext* txn, bool* wasChanged); private: - // Different ways we can refresh metadata - enum RefreshType { - // The version has gone up, but the collection hasn't been dropped - RefreshType_RefreshChunkManager, - // The collection may have been dropped, so we need to reload the db - RefreshType_ReloadDatabase - }; - - typedef std::map<ShardId, ChunkVersion> ShardVersionMap; - + using ShardVersionMap = std::map<ShardId, ChunkVersion>; /** * Performs an actual refresh from the config server. */ - Status refreshNow(OperationContext* txn, RefreshType refreshType); + Status refreshNow(OperationContext* opCtx); /** * Returns a vector of ShardEndpoints where a document might need to be placed. @@ -167,10 +158,8 @@ private: // Represents only the view and not really part of the targeter state. This is not owned here. TargeterStats* _stats; - // Zero or one of these are filled at all times - // If sharded, _manager, if unsharded, _primary, on error, neither - std::shared_ptr<ChunkManager> _manager; - std::shared_ptr<Shard> _primary; + // The latest loaded routing cache entry + boost::optional<CachedCollectionRoutingInfo> _routingInfo; // Map of shard->remote shard version reported from stale errors ShardVersionMap _remoteShardVersions; diff --git a/src/mongo/s/commands/cluster_aggregate.cpp b/src/mongo/s/commands/cluster_aggregate.cpp index 7c7bf6686dd..381d3a0d620 100644 --- a/src/mongo/s/commands/cluster_aggregate.cpp +++ b/src/mongo/s/commands/cluster_aggregate.cpp @@ -47,7 +47,7 @@ #include "mongo/db/views/view.h" #include "mongo/executor/task_executor_pool.h" #include "mongo/rpc/get_status_from_command_result.h" -#include "mongo/s/chunk_manager.h" +#include "mongo/s/catalog_cache.h" #include "mongo/s/client/shard_connection.h" #include "mongo/s/client/shard_registry.h" #include "mongo/s/commands/cluster_commands_common.h" @@ -55,30 +55,32 @@ #include "mongo/s/grid.h" #include "mongo/s/query/cluster_query_knobs.h" #include "mongo/s/query/store_possible_cursor.h" -#include "mongo/s/sharding_raii.h" #include "mongo/s/stale_exception.h" #include "mongo/util/log.h" namespace mongo { -Status ClusterAggregate::runAggregate(OperationContext* txn, +Status ClusterAggregate::runAggregate(OperationContext* opCtx, const Namespaces& namespaces, BSONObj cmdObj, int options, BSONObjBuilder* result) { - auto scopedShardDbStatus = ScopedShardDatabase::getExisting(txn, namespaces.executionNss.db()); - if (!scopedShardDbStatus.isOK()) { - appendEmptyResultSet( - *result, scopedShardDbStatus.getStatus(), namespaces.requestedNss.ns()); - return Status::OK(); - } - auto request = AggregationRequest::parseFromBSON(namespaces.executionNss, cmdObj); if (!request.isOK()) { return request.getStatus(); } - const auto conf = scopedShardDbStatus.getValue().db(); + auto const catalogCache = Grid::get(opCtx)->catalogCache(); + + auto executionNsRoutingInfoStatus = + catalogCache->getCollectionRoutingInfo(opCtx, namespaces.executionNss); + if (!executionNsRoutingInfoStatus.isOK()) { + appendEmptyResultSet( + *result, executionNsRoutingInfoStatus.getStatus(), namespaces.requestedNss.ns()); + return Status::OK(); + } + + const auto& executionNsRoutingInfo = executionNsRoutingInfoStatus.getValue(); // Determine the appropriate collation and 'resolve' involved namespaces to make the // ExpressionContext. @@ -90,27 +92,31 @@ Status ClusterAggregate::runAggregate(OperationContext* txn, // command on an unsharded collection. StringMap<ExpressionContext::ResolvedNamespace> resolvedNamespaces; LiteParsedPipeline liteParsedPipeline(request.getValue()); - for (auto&& ns : liteParsedPipeline.getInvolvedNamespaces()) { - uassert(28769, str::stream() << ns.ns() << " cannot be sharded", !conf->isSharded(ns.ns())); - resolvedNamespaces[ns.coll()] = {ns, std::vector<BSONObj>{}}; + for (auto&& nss : liteParsedPipeline.getInvolvedNamespaces()) { + const auto resolvedNsRoutingInfo = + uassertStatusOK(catalogCache->getCollectionRoutingInfo(opCtx, nss)); + uassert( + 28769, str::stream() << nss.ns() << " cannot be sharded", !resolvedNsRoutingInfo.cm()); + resolvedNamespaces.try_emplace(nss.coll(), nss, std::vector<BSONObj>{}); } - if (!conf->isSharded(namespaces.executionNss.ns())) { - return aggPassthrough(txn, namespaces, conf, cmdObj, result, options); + if (!executionNsRoutingInfo.cm()) { + return aggPassthrough( + opCtx, namespaces, executionNsRoutingInfo.primary()->getId(), cmdObj, result, options); } - auto chunkMgr = conf->getChunkManager(txn, namespaces.executionNss.ns()); + const auto chunkMgr = executionNsRoutingInfo.cm(); std::unique_ptr<CollatorInterface> collation; if (!request.getValue().getCollation().isEmpty()) { - collation = uassertStatusOK(CollatorFactoryInterface::get(txn->getServiceContext()) + collation = uassertStatusOK(CollatorFactoryInterface::get(opCtx->getServiceContext()) ->makeFromBSON(request.getValue().getCollation())); } else if (chunkMgr->getDefaultCollator()) { collation = chunkMgr->getDefaultCollator()->clone(); } boost::intrusive_ptr<ExpressionContext> mergeCtx = new ExpressionContext( - txn, request.getValue(), std::move(collation), std::move(resolvedNamespaces)); + opCtx, request.getValue(), std::move(collation), std::move(resolvedNamespaces)); mergeCtx->inRouter = true; // explicitly *not* setting mergeCtx->tempDir @@ -127,7 +133,7 @@ Status ClusterAggregate::runAggregate(OperationContext* txn, const bool singleShard = [&]() { BSONObj firstMatchQuery = pipeline.getValue()->getInitialQuery(); BSONObj shardKeyMatches = uassertStatusOK( - chunkMgr->getShardKeyPattern().extractShardKeyFromQuery(txn, firstMatchQuery)); + chunkMgr->getShardKeyPattern().extractShardKeyFromQuery(opCtx, firstMatchQuery)); if (shardKeyMatches.isEmpty()) { return false; @@ -176,7 +182,7 @@ Status ClusterAggregate::runAggregate(OperationContext* txn, // Run the command on the shards // TODO need to make sure cursors are killed if a retry is needed std::vector<Strategy::CommandResult> shardResults; - Strategy::commandOp(txn, + Strategy::commandOp(opCtx, namespaces.executionNss.db().toString(), shardedCommand, options, @@ -210,13 +216,13 @@ Status ClusterAggregate::runAggregate(OperationContext* txn, if (!needSplit) { invariant(shardResults.size() == 1); invariant(shardResults[0].target.getServers().size() == 1); - auto executorPool = Grid::get(txn)->getExecutorPool(); + auto executorPool = Grid::get(opCtx)->getExecutorPool(); const BSONObj reply = uassertStatusOK(storePossibleCursor(shardResults[0].target.getServers()[0], shardResults[0].result, namespaces.requestedNss, executorPool->getArbitraryExecutor(), - Grid::get(txn)->getCursorManager())); + Grid::get(opCtx)->getCursorManager())); result->appendElements(reply); return getStatusFromCommandResult(reply); } @@ -257,16 +263,18 @@ Status ClusterAggregate::runAggregate(OperationContext* txn, // Run merging command on random shard, unless a stage needs the primary shard. Need to use // ShardConnection so that the merging mongod is sent the config servers on connection init. - auto& prng = txn->getClient()->getPrng(); - const auto& mergingShardId = (needPrimaryShardMerger || internalQueryAlwaysMergeOnPrimaryShard) - ? conf->getPrimaryId() + auto& prng = opCtx->getClient()->getPrng(); + const auto mergingShardId = + (needPrimaryShardMerger || internalQueryAlwaysMergeOnPrimaryShard.load()) + ? uassertStatusOK(catalogCache->getDatabase(opCtx, namespaces.executionNss.db())) + .primaryId() : shardResults[prng.nextInt32(shardResults.size())].shardTargetId; const auto mergingShard = - uassertStatusOK(Grid::get(txn)->shardRegistry()->getShard(txn, mergingShardId)); + uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(opCtx, mergingShardId)); ShardConnection conn(mergingShard->getConnString(), outputNsOrEmpty); BSONObj mergedResults = - aggRunCommand(txn, conn.get(), namespaces, mergeCmd.freeze().toBson(), options); + aggRunCommand(opCtx, conn.get(), namespaces, mergeCmd.freeze().toBson(), options); conn.done(); if (auto wcErrorElem = mergedResults["writeConcernError"]) { @@ -383,7 +391,7 @@ void ClusterAggregate::killAllCursors(const std::vector<Strategy::CommandResult> } } -BSONObj ClusterAggregate::aggRunCommand(OperationContext* txn, +BSONObj ClusterAggregate::aggRunCommand(OperationContext* opCtx, DBClientBase* conn, const Namespaces& namespaces, BSONObj cmd, @@ -411,29 +419,29 @@ BSONObj ClusterAggregate::aggRunCommand(OperationContext* txn, throw RecvStaleConfigException("command failed because of stale config", result); } - auto executorPool = Grid::get(txn)->getExecutorPool(); + auto executorPool = Grid::get(opCtx)->getExecutorPool(); result = uassertStatusOK(storePossibleCursor(HostAndPort(cursor->originalHost()), result, namespaces.requestedNss, executorPool->getArbitraryExecutor(), - Grid::get(txn)->getCursorManager())); + Grid::get(opCtx)->getCursorManager())); return result; } -Status ClusterAggregate::aggPassthrough(OperationContext* txn, +Status ClusterAggregate::aggPassthrough(OperationContext* opCtx, const Namespaces& namespaces, - DBConfig* conf, + const ShardId& shardId, BSONObj cmdObj, BSONObjBuilder* out, int queryOptions) { // Temporary hack. See comment on declaration for details. - auto shardStatus = Grid::get(txn)->shardRegistry()->getShard(txn, conf->getPrimaryId()); + auto shardStatus = Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardId); if (!shardStatus.isOK()) { return shardStatus.getStatus(); } ShardConnection conn(shardStatus.getValue()->getConnString(), ""); - BSONObj result = aggRunCommand(txn, conn.get(), namespaces, cmdObj, queryOptions); + BSONObj result = aggRunCommand(opCtx, conn.get(), namespaces, cmdObj, queryOptions); conn.done(); // First append the properly constructed writeConcernError. It will then be skipped @@ -469,7 +477,8 @@ Status ClusterAggregate::aggPassthrough(OperationContext* txn, Namespaces nsStruct; nsStruct.requestedNss = namespaces.requestedNss; nsStruct.executionNss = resolvedView.getNamespace(); - return ClusterAggregate::runAggregate(txn, nsStruct, aggCmd.getValue(), queryOptions, out); + return ClusterAggregate::runAggregate( + opCtx, nsStruct, aggCmd.getValue(), queryOptions, out); } return getStatusFromCommandResult(result); diff --git a/src/mongo/s/commands/cluster_aggregate.h b/src/mongo/s/commands/cluster_aggregate.h index da8e7bb46bc..301ae217a0e 100644 --- a/src/mongo/s/commands/cluster_aggregate.h +++ b/src/mongo/s/commands/cluster_aggregate.h @@ -37,11 +37,11 @@ #include "mongo/db/pipeline/document_source.h" #include "mongo/db/pipeline/document_source_merge_cursors.h" #include "mongo/s/commands/strategy.h" -#include "mongo/s/config.h" namespace mongo { class OperationContext; +class ShardId; /** * Methods for running aggregation across a sharded cluster. @@ -90,7 +90,7 @@ private: static Status aggPassthrough(OperationContext* txn, const Namespaces& namespaces, - DBConfig* conf, + const ShardId& shardId, BSONObj cmd, BSONObjBuilder* result, int queryOptions); diff --git a/src/mongo/s/commands/cluster_commands_common.cpp b/src/mongo/s/commands/cluster_commands_common.cpp index b4035da68c9..225cef2b6ef 100644 --- a/src/mongo/s/commands/cluster_commands_common.cpp +++ b/src/mongo/s/commands/cluster_commands_common.cpp @@ -40,7 +40,6 @@ #include "mongo/s/client/shard_connection.h" #include "mongo/s/client/version_manager.h" #include "mongo/s/grid.h" -#include "mongo/s/sharding_raii.h" #include "mongo/s/stale_exception.h" #include "mongo/util/log.h" @@ -51,20 +50,24 @@ using std::string; namespace { -bool forceRemoteCheckShardVersionCB(OperationContext* txn, const string& ns) { +bool forceRemoteCheckShardVersionCB(OperationContext* opCtx, const string& ns) { const NamespaceString nss(ns); + if (!nss.isValid()) { + return false; + } + // This will force the database catalog entry to be reloaded - Grid::get(txn)->catalogCache()->invalidate(nss.db()); + Grid::get(opCtx)->catalogCache()->invalidateShardedCollection(nss); - auto scopedCMStatus = ScopedChunkManager::get(txn, nss); - if (!scopedCMStatus.isOK()) { + auto routingInfoStatus = Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfo(opCtx, nss); + if (!routingInfoStatus.isOK()) { return false; } - const auto& scopedCM = scopedCMStatus.getValue(); + auto& routingInfo = routingInfoStatus.getValue(); - return scopedCM.cm() != nullptr; + return routingInfo.cm() != nullptr; } } // namespace @@ -113,7 +116,7 @@ void Future::CommandResult::init() { } } -bool Future::CommandResult::join(OperationContext* txn, int maxRetries) { +bool Future::CommandResult::join(OperationContext* opCtx, int maxRetries) { if (_done) { return _ok; } @@ -155,7 +158,7 @@ bool Future::CommandResult::join(OperationContext* txn, int maxRetries) { } if (i >= maxRetries / 2) { - if (!forceRemoteCheckShardVersionCB(txn, staleNS)) { + if (!forceRemoteCheckShardVersionCB(opCtx, staleNS)) { error() << "Future::spawnCommand (part 2) no config detected" << causedBy(redact(e)); throw e; @@ -169,7 +172,7 @@ bool Future::CommandResult::join(OperationContext* txn, int maxRetries) { << "for lazy command " << redact(_cmd) << ", could not refresh " << staleNS; } else { - versionManager.checkShardVersionCB(txn, _conn, staleNS, false, 1); + versionManager.checkShardVersionCB(opCtx, _conn, staleNS, false, 1); } LOG(i > 1 ? 0 : 1) << "retrying lazy command" << causedBy(redact(e)); @@ -242,13 +245,13 @@ bool appendEmptyResultSet(BSONObjBuilder& result, Status status, const std::stri return Command::appendCommandStatus(result, status); } -std::vector<NamespaceString> getAllShardedCollectionsForDb(OperationContext* txn, +std::vector<NamespaceString> getAllShardedCollectionsForDb(OperationContext* opCtx, StringData dbName) { const auto dbNameStr = dbName.toString(); std::vector<CollectionType> collectionsOnConfig; - uassertStatusOK(Grid::get(txn)->catalogClient(txn)->getCollections( - txn, &dbNameStr, &collectionsOnConfig, nullptr)); + uassertStatusOK(Grid::get(opCtx)->catalogClient(opCtx)->getCollections( + opCtx, &dbNameStr, &collectionsOnConfig, nullptr)); std::vector<NamespaceString> collectionsToReturn; for (const auto& coll : collectionsOnConfig) { @@ -261,4 +264,36 @@ std::vector<NamespaceString> getAllShardedCollectionsForDb(OperationContext* txn return collectionsToReturn; } +CachedCollectionRoutingInfo getShardedCollection(OperationContext* opCtx, + const NamespaceString& nss) { + auto routingInfo = + uassertStatusOK(Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfo(opCtx, nss)); + uassert(ErrorCodes::NamespaceNotSharded, + str::stream() << "Collection " << nss.ns() << " is not sharded.", + routingInfo.cm()); + + return routingInfo; +} + +StatusWith<CachedDatabaseInfo> createShardDatabase(OperationContext* opCtx, StringData dbName) { + auto dbStatus = Grid::get(opCtx)->catalogCache()->getDatabase(opCtx, dbName); + if (dbStatus == ErrorCodes::NamespaceNotFound) { + auto createDbStatus = + Grid::get(opCtx)->catalogClient(opCtx)->createDatabase(opCtx, dbName.toString()); + if (createDbStatus.isOK() || createDbStatus == ErrorCodes::NamespaceExists) { + dbStatus = Grid::get(opCtx)->catalogCache()->getDatabase(opCtx, dbName); + } else { + dbStatus = createDbStatus; + } + } + + if (dbStatus.isOK()) { + return dbStatus; + } + + return {dbStatus.getStatus().code(), + str::stream() << "Database " << dbName << " not found due to " + << dbStatus.getStatus().reason()}; +} + } // namespace mongo diff --git a/src/mongo/s/commands/cluster_commands_common.h b/src/mongo/s/commands/cluster_commands_common.h index 819fd8738ea..0f3b76aead2 100644 --- a/src/mongo/s/commands/cluster_commands_common.h +++ b/src/mongo/s/commands/cluster_commands_common.h @@ -39,6 +39,8 @@ namespace mongo { class AScopedConnection; +class CachedCollectionRoutingInfo; +class CachedDatabaseInfo; class DBClientBase; class DBClientCursor; class OperationContext; @@ -140,4 +142,17 @@ bool appendEmptyResultSet(BSONObjBuilder& result, Status status, const std::stri std::vector<NamespaceString> getAllShardedCollectionsForDb(OperationContext* txn, StringData dbName); +/** + * Abstracts the common pattern of refreshing a collection and checking if it is sharded used across + * multiple commands. + */ +CachedCollectionRoutingInfo getShardedCollection(OperationContext* opCtx, + const NamespaceString& nss); + +/** + * If the specified database exists already, loads it in the cache (if not already there) and + * returns it. Otherwise, if it does not exist, this call will implicitly create it as non-sharded. + */ +StatusWith<CachedDatabaseInfo> createShardDatabase(OperationContext* opCtx, StringData dbName); + } // namespace mongo diff --git a/src/mongo/s/commands/cluster_compact_cmd.cpp b/src/mongo/s/commands/cluster_compact_cmd.cpp new file mode 100644 index 00000000000..83c7f1c5990 --- /dev/null +++ b/src/mongo/s/commands/cluster_compact_cmd.cpp @@ -0,0 +1,72 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/commands.h" + +namespace mongo { +namespace { + +class CompactCmd : public Command { +public: + CompactCmd() : Command("compact") {} + + bool slaveOk() const override { + return true; + } + + bool adminOnly() const override { + return false; + } + + void addRequiredPrivileges(const std::string& dbname, + const BSONObj& cmdObj, + std::vector<Privilege>* out) override { + ActionSet actions; + actions.addAction(ActionType::compact); + out->push_back(Privilege(parseResourcePattern(dbname, cmdObj), actions)); + } + + bool supportsWriteConcern(const BSONObj& cmd) const override { + return false; + } + + bool run(OperationContext* opCtx, + const std::string& dbName, + BSONObj& cmdObj, + int options, + std::string& errmsg, + BSONObjBuilder& result) override { + uasserted(ErrorCodes::CommandNotSupported, "compact not allowed through mongos"); + } + +} compactCmd; + +} // namespace +} // namespace mongo diff --git a/src/mongo/s/commands/cluster_count_cmd.cpp b/src/mongo/s/commands/cluster_count_cmd.cpp index ec2798bb62e..45c46c26185 100644 --- a/src/mongo/s/commands/cluster_count_cmd.cpp +++ b/src/mongo/s/commands/cluster_count_cmd.cpp @@ -42,73 +42,42 @@ #include "mongo/util/timer.h" namespace mongo { - -using std::string; -using std::vector; - namespace { -long long applySkipLimit(long long num, const BSONObj& cmd) { - BSONElement s = cmd["skip"]; - BSONElement l = cmd["limit"]; - - if (s.isNumber()) { - num = num - s.numberLong(); - if (num < 0) { - num = 0; - } - } - - if (l.isNumber()) { - long long limit = l.numberLong(); - if (limit < 0) { - limit = -limit; - } - - // 0 limit means no limit - if (limit < num && limit != 0) { - num = limit; - } - } - - return num; -} - - class ClusterCountCmd : public Command { public: ClusterCountCmd() : Command("count", false) {} - virtual bool slaveOk() const { + bool slaveOk() const override { return true; } - virtual bool adminOnly() const { + bool adminOnly() const override { return false; } - - virtual bool supportsWriteConcern(const BSONObj& cmd) const override { + bool supportsWriteConcern(const BSONObj& cmd) const override { return false; } - virtual void addRequiredPrivileges(const std::string& dbname, - const BSONObj& cmdObj, - std::vector<Privilege>* out) { + void addRequiredPrivileges(const std::string& dbname, + const BSONObj& cmdObj, + std::vector<Privilege>* out) override { ActionSet actions; actions.addAction(ActionType::find); out->push_back(Privilege(parseResourcePattern(dbname, cmdObj), actions)); } - virtual bool run(OperationContext* txn, - const std::string& dbname, - BSONObj& cmdObj, - int options, - std::string& errmsg, - BSONObjBuilder& result) { + bool run(OperationContext* opCtx, + const std::string& dbname, + BSONObj& cmdObj, + int options, + std::string& errmsg, + BSONObjBuilder& result) override { const NamespaceString nss(parseNs(dbname, cmdObj)); - uassert( - ErrorCodes::InvalidNamespace, "count command requires valid namespace", nss.isValid()); + uassert(ErrorCodes::InvalidNamespace, + str::stream() << "Invalid namespace specified '" << nss.ns() << "'", + nss.isValid()); long long skip = 0; @@ -167,8 +136,8 @@ public: } } - vector<Strategy::CommandResult> countResult; - Strategy::commandOp(txn, + std::vector<Strategy::CommandResult> countResult; + Strategy::commandOp(opCtx, dbname, countCmdBuilder.done(), options, @@ -198,7 +167,7 @@ public: BSONObjBuilder aggResult; Command::findCommand("aggregate") - ->run(txn, dbname, aggCmd.getValue(), options, errmsg, aggResult); + ->run(opCtx, dbname, aggCmd.getValue(), options, errmsg, aggResult); result.resetToEmpty(); ViewResponseFormatter formatter(aggResult.obj()); @@ -214,20 +183,19 @@ public: long long total = 0; BSONObjBuilder shardSubTotal(result.subobjStart("shards")); - for (vector<Strategy::CommandResult>::const_iterator iter = countResult.begin(); - iter != countResult.end(); - ++iter) { - const ShardId& shardName = iter->shardTargetId; + for (const auto& resultEntry : countResult) { + const ShardId& shardName = resultEntry.shardTargetId; + const auto resultBSON = resultEntry.result; - if (iter->result["ok"].trueValue()) { - long long shardCount = iter->result["n"].numberLong(); + if (resultBSON["ok"].trueValue()) { + long long shardCount = resultBSON["n"].numberLong(); shardSubTotal.appendNumber(shardName.toString(), shardCount); total += shardCount; } else { shardSubTotal.doneFast(); errmsg = "failed on : " + shardName.toString(); - result.append("cause", iter->result); + result.append("cause", resultBSON); // Add "code" to the top-level response, if the failure of the sharded command // can be accounted to a single error @@ -247,17 +215,16 @@ public: return true; } - virtual Status explain(OperationContext* txn, - const std::string& dbname, - const BSONObj& cmdObj, - ExplainCommon::Verbosity verbosity, - const rpc::ServerSelectionMetadata& serverSelectionMetadata, - BSONObjBuilder* out) const { + Status explain(OperationContext* opCtx, + const std::string& dbname, + const BSONObj& cmdObj, + ExplainCommon::Verbosity verbosity, + const rpc::ServerSelectionMetadata& serverSelectionMetadata, + BSONObjBuilder* out) const override { const NamespaceString nss(parseNs(dbname, cmdObj)); - if (!nss.isValid()) { - return Status{ErrorCodes::InvalidNamespace, - str::stream() << "Invalid collection name: " << nss.ns()}; - } + uassert(ErrorCodes::InvalidNamespace, + str::stream() << "Invalid namespace specified '" << nss.ns() << "'", + nss.isValid()); // Extract the targeting query. BSONObj targetingQuery; @@ -284,8 +251,8 @@ public: // We will time how long it takes to run the commands on the shards Timer timer; - vector<Strategy::CommandResult> shardResults; - Strategy::commandOp(txn, + std::vector<Strategy::CommandResult> shardResults; + Strategy::commandOp(opCtx, dbname, explainCmdBob.obj(), options, @@ -316,7 +283,7 @@ public: std::string errMsg; if (Command::findCommand("aggregate") - ->run(txn, dbname, aggCmd.getValue(), 0, errMsg, *out)) { + ->run(opCtx, dbname, aggCmd.getValue(), 0, errMsg, *out)) { return Status::OK(); } @@ -326,7 +293,34 @@ public: const char* mongosStageName = ClusterExplain::getStageNameForReadOp(shardResults, cmdObj); return ClusterExplain::buildExplainResult( - txn, shardResults, mongosStageName, millisElapsed, out); + opCtx, shardResults, mongosStageName, millisElapsed, out); + } + +private: + static long long applySkipLimit(long long num, const BSONObj& cmd) { + BSONElement s = cmd["skip"]; + BSONElement l = cmd["limit"]; + + if (s.isNumber()) { + num = num - s.numberLong(); + if (num < 0) { + num = 0; + } + } + + if (l.isNumber()) { + long long limit = l.numberLong(); + if (limit < 0) { + limit = -limit; + } + + // 0 limit means no limit + if (limit < num && limit != 0) { + num = limit; + } + } + + return num; } } clusterCountCmd; diff --git a/src/mongo/s/commands/cluster_drop_cmd.cpp b/src/mongo/s/commands/cluster_drop_cmd.cpp index 7f611f543f8..d5b1a7fa45d 100644 --- a/src/mongo/s/commands/cluster_drop_cmd.cpp +++ b/src/mongo/s/commands/cluster_drop_cmd.cpp @@ -41,7 +41,6 @@ #include "mongo/s/commands/cluster_commands_common.h" #include "mongo/s/commands/sharded_command_processing.h" #include "mongo/s/grid.h" -#include "mongo/s/sharding_raii.h" #include "mongo/s/stale_exception.h" #include "mongo/util/log.h" @@ -72,7 +71,7 @@ public: out->push_back(Privilege(parseResourcePattern(dbname, cmdObj), actions)); } - bool run(OperationContext* txn, + bool run(OperationContext* opCtx, const std::string& dbname, BSONObj& cmdObj, int options, @@ -80,20 +79,19 @@ public: BSONObjBuilder& result) override { const NamespaceString nss(parseNsCollectionRequired(dbname, cmdObj)); - auto scopedDbStatus = ScopedShardDatabase::getExisting(txn, dbname); - if (scopedDbStatus == ErrorCodes::NamespaceNotFound) { + auto const catalogCache = Grid::get(opCtx)->catalogCache(); + + auto routingInfoStatus = catalogCache->getCollectionRoutingInfo(opCtx, nss); + if (routingInfoStatus == ErrorCodes::NamespaceNotFound) { return true; } - uassertStatusOK(scopedDbStatus.getStatus()); - - auto const db = scopedDbStatus.getValue().db(); - - if (!db->isSharded(nss.ns())) { - _dropUnshardedCollectionFromShard(txn, db->getPrimaryId(), nss, &result); + auto routingInfo = uassertStatusOK(std::move(routingInfoStatus)); + if (!routingInfo.cm()) { + _dropUnshardedCollectionFromShard(opCtx, routingInfo.primaryId(), nss, &result); } else { - uassertStatusOK(Grid::get(txn)->catalogClient(txn)->dropCollection(txn, nss)); - db->markNSNotSharded(nss.ns()); + uassertStatusOK(Grid::get(opCtx)->catalogClient(opCtx)->dropCollection(opCtx, nss)); + catalogCache->invalidateShardedCollection(nss); } return true; @@ -104,13 +102,13 @@ private: * Sends the 'drop' command for the specified collection to the specified shard. Throws * DBException on failure. */ - static void _dropUnshardedCollectionFromShard(OperationContext* txn, + static void _dropUnshardedCollectionFromShard(OperationContext* opCtx, const ShardId& shardId, const NamespaceString& nss, BSONObjBuilder* result) { - const auto shardRegistry = Grid::get(txn)->shardRegistry(); + const auto shardRegistry = Grid::get(opCtx)->shardRegistry(); - const auto dropCommandBSON = [shardRegistry, txn, &shardId, &nss] { + const auto dropCommandBSON = [shardRegistry, opCtx, &shardId, &nss] { BSONObjBuilder builder; builder.append("drop", nss.coll()); @@ -121,17 +119,17 @@ private: ChunkVersion::UNSHARDED().appendForCommands(&builder); } - if (!txn->getWriteConcern().usedDefault) { + if (!opCtx->getWriteConcern().usedDefault) { builder.append(WriteConcernOptions::kWriteConcernField, - txn->getWriteConcern().toBSON()); + opCtx->getWriteConcern().toBSON()); } return builder.obj(); }(); - const auto shard = uassertStatusOK(shardRegistry->getShard(txn, shardId)); + const auto shard = uassertStatusOK(shardRegistry->getShard(opCtx, shardId)); auto cmdDropResult = uassertStatusOK(shard->runCommandWithFixedRetryAttempts( - txn, + opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, nss.db().toString(), dropCommandBSON, diff --git a/src/mongo/s/commands/cluster_drop_database_cmd.cpp b/src/mongo/s/commands/cluster_drop_database_cmd.cpp index f227b4aa89a..178fc5f36bc 100644 --- a/src/mongo/s/commands/cluster_drop_database_cmd.cpp +++ b/src/mongo/s/commands/cluster_drop_database_cmd.cpp @@ -40,9 +40,7 @@ #include "mongo/s/client/shard_registry.h" #include "mongo/s/commands/cluster_commands_common.h" #include "mongo/s/commands/sharded_command_processing.h" -#include "mongo/s/config.h" #include "mongo/s/grid.h" -#include "mongo/s/sharding_raii.h" #include "mongo/util/log.h" namespace mongo { @@ -72,7 +70,7 @@ public: out->push_back(Privilege(ResourcePattern::forDatabaseName(dbname), actions)); } - bool run(OperationContext* txn, + bool run(OperationContext* opCtx, const std::string& dbname, BSONObj& cmdObj, int options, @@ -86,55 +84,56 @@ public: "have to pass 1 as db parameter", cmdObj.firstElement().isNumber() && cmdObj.firstElement().number() == 1); - auto const catalogClient = Grid::get(txn)->catalogClient(txn); + auto const catalogClient = Grid::get(opCtx)->catalogClient(opCtx); // Lock the database globally to prevent conflicts with simultaneous database // creation/modification. auto scopedDistLock = uassertStatusOK(catalogClient->getDistLockManager()->lock( - txn, dbname, "dropDatabase", DistLockManager::kDefaultLockTimeout)); + opCtx, dbname, "dropDatabase", DistLockManager::kDefaultLockTimeout)); + + auto const catalogCache = Grid::get(opCtx)->catalogCache(); // Refresh the database metadata so it kicks off a full reload - Grid::get(txn)->catalogCache()->invalidate(dbname); + catalogCache->purgeDatabase(dbname); - auto scopedDbStatus = ScopedShardDatabase::getExisting(txn, dbname); + auto dbInfoStatus = catalogCache->getDatabase(opCtx, dbname); - if (scopedDbStatus == ErrorCodes::NamespaceNotFound) { + if (dbInfoStatus == ErrorCodes::NamespaceNotFound) { result.append("info", "database does not exist"); return true; } - uassertStatusOK(scopedDbStatus.getStatus()); + uassertStatusOK(dbInfoStatus.getStatus()); - catalogClient->logChange(txn, + catalogClient->logChange(opCtx, "dropDatabase.start", dbname, BSONObj(), ShardingCatalogClient::kMajorityWriteConcern); - auto const db = scopedDbStatus.getValue().db(); + auto& dbInfo = dbInfoStatus.getValue(); // Drop the database's collections from metadata - for (const auto& nss : getAllShardedCollectionsForDb(txn, dbname)) { - uassertStatusOK(catalogClient->dropCollection(txn, nss)); - db->markNSNotSharded(nss.ns()); + for (const auto& nss : getAllShardedCollectionsForDb(opCtx, dbname)) { + uassertStatusOK(catalogClient->dropCollection(opCtx, nss)); } // Drop the database from the primary shard first - _dropDatabaseFromShard(txn, db->getPrimaryId(), dbname); + _dropDatabaseFromShard(opCtx, dbInfo.primaryId(), dbname); // Drop the database from each of the remaining shards { std::vector<ShardId> allShardIds; - Grid::get(txn)->shardRegistry()->getAllShardIds(&allShardIds); + Grid::get(opCtx)->shardRegistry()->getAllShardIds(&allShardIds); for (const ShardId& shardId : allShardIds) { - _dropDatabaseFromShard(txn, shardId, dbname); + _dropDatabaseFromShard(opCtx, shardId, dbname); } } // Remove the database entry from the metadata Status status = - catalogClient->removeConfigDocuments(txn, + catalogClient->removeConfigDocuments(opCtx, DatabaseType::ConfigNS, BSON(DatabaseType::name(dbname)), ShardingCatalogClient::kMajorityWriteConcern); @@ -146,10 +145,10 @@ public: } // Invalidate the database so the next access will do a full reload - Grid::get(txn)->catalogCache()->invalidate(dbname); + catalogCache->purgeDatabase(dbname); catalogClient->logChange( - txn, "dropDatabase", dbname, BSONObj(), ShardingCatalogClient::kMajorityWriteConcern); + opCtx, "dropDatabase", dbname, BSONObj(), ShardingCatalogClient::kMajorityWriteConcern); result.append("dropped", dbname); return true; @@ -160,24 +159,25 @@ private: * Sends the 'dropDatabase' command for the specified database to the specified shard. Throws * DBException on failure. */ - static void _dropDatabaseFromShard(OperationContext* txn, + static void _dropDatabaseFromShard(OperationContext* opCtx, const ShardId& shardId, const std::string& dbName) { - const auto dropDatabaseCommandBSON = [txn, &dbName] { + const auto dropDatabaseCommandBSON = [opCtx, &dbName] { BSONObjBuilder builder; builder.append("dropDatabase", 1); - if (!txn->getWriteConcern().usedDefault) { + if (!opCtx->getWriteConcern().usedDefault) { builder.append(WriteConcernOptions::kWriteConcernField, - txn->getWriteConcern().toBSON()); + opCtx->getWriteConcern().toBSON()); } return builder.obj(); }(); - const auto shard = uassertStatusOK(Grid::get(txn)->shardRegistry()->getShard(txn, shardId)); + const auto shard = + uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardId)); auto cmdDropDatabaseResult = uassertStatusOK(shard->runCommandWithFixedRetryAttempts( - txn, + opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, dbName, dropDatabaseCommandBSON, diff --git a/src/mongo/s/commands/cluster_enable_sharding_cmd.cpp b/src/mongo/s/commands/cluster_enable_sharding_cmd.cpp index 555fdfb0349..0f4688efb2a 100644 --- a/src/mongo/s/commands/cluster_enable_sharding_cmd.cpp +++ b/src/mongo/s/commands/cluster_enable_sharding_cmd.cpp @@ -41,7 +41,6 @@ #include "mongo/db/commands.h" #include "mongo/s/catalog/sharding_catalog_client.h" #include "mongo/s/catalog_cache.h" -#include "mongo/s/config.h" #include "mongo/s/grid.h" #include "mongo/util/log.h" @@ -87,7 +86,7 @@ public: return cmdObj.firstElement().str(); } - virtual bool run(OperationContext* txn, + virtual bool run(OperationContext* opCtx, const std::string& dbname_unused, BSONObj& cmdObj, int options, @@ -100,16 +99,17 @@ public: return false; } - if (dbname == "admin" || dbname == "config" || dbname == "local") { + if (dbname == NamespaceString::kAdminDb || dbname == NamespaceString::kConfigDb || + dbname == NamespaceString::kLocalDb) { errmsg = "can't shard " + dbname + " database"; return false; } - uassertStatusOK(Grid::get(txn)->catalogClient(txn)->enableSharding(txn, dbname)); + uassertStatusOK(Grid::get(opCtx)->catalogClient(opCtx)->enableSharding(opCtx, dbname)); audit::logEnableSharding(Client::getCurrent(), dbname); // Make sure to force update of any stale metadata - Grid::get(txn)->catalogCache()->invalidate(dbname); + Grid::get(opCtx)->catalogCache()->purgeDatabase(dbname); return true; } diff --git a/src/mongo/s/commands/cluster_find_and_modify_cmd.cpp b/src/mongo/s/commands/cluster_find_and_modify_cmd.cpp index eabb8661051..feae1fab5e2 100644 --- a/src/mongo/s/commands/cluster_find_and_modify_cmd.cpp +++ b/src/mongo/s/commands/cluster_find_and_modify_cmd.cpp @@ -40,7 +40,6 @@ #include "mongo/db/query/collation/collator_factory_interface.h" #include "mongo/s/balancer_configuration.h" #include "mongo/s/catalog_cache.h" -#include "mongo/s/chunk_manager.h" #include "mongo/s/client/shard_connection.h" #include "mongo/s/client/shard_registry.h" #include "mongo/s/commands/cluster_explain.h" @@ -48,8 +47,6 @@ #include "mongo/s/commands/sharded_command_processing.h" #include "mongo/s/commands/strategy.h" #include "mongo/s/grid.h" -#include "mongo/s/mongos_options.h" -#include "mongo/s/sharding_raii.h" #include "mongo/s/stale_exception.h" #include "mongo/util/timer.h" @@ -64,47 +61,42 @@ class FindAndModifyCmd : public Command { public: FindAndModifyCmd() : Command("findAndModify", false, "findandmodify") {} - virtual bool slaveOk() const { + bool slaveOk() const override { return true; } - virtual bool adminOnly() const { + bool adminOnly() const override { return false; } - - virtual bool supportsWriteConcern(const BSONObj& cmd) const override { + bool supportsWriteConcern(const BSONObj& cmd) const override { return true; } - virtual void addRequiredPrivileges(const std::string& dbname, - const BSONObj& cmdObj, - std::vector<Privilege>* out) { + void addRequiredPrivileges(const std::string& dbname, + const BSONObj& cmdObj, + std::vector<Privilege>* out) override { find_and_modify::addPrivilegesRequiredForFindAndModify(this, dbname, cmdObj, out); } - virtual Status explain(OperationContext* txn, - const std::string& dbName, - const BSONObj& cmdObj, - ExplainCommon::Verbosity verbosity, - const rpc::ServerSelectionMetadata& serverSelectionMetadata, - BSONObjBuilder* out) const { - const NamespaceString nss = parseNsCollectionRequired(dbName, cmdObj); + Status explain(OperationContext* opCtx, + const std::string& dbName, + const BSONObj& cmdObj, + ExplainCommon::Verbosity verbosity, + const rpc::ServerSelectionMetadata& serverSelectionMetadata, + BSONObjBuilder* out) const override { + const NamespaceString nss(parseNsCollectionRequired(dbName, cmdObj)); - auto scopedDB = uassertStatusOK(ScopedShardDatabase::getExisting(txn, dbName)); - const auto conf = scopedDB.db(); + auto routingInfo = + uassertStatusOK(Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfo(opCtx, nss)); shared_ptr<ChunkManager> chunkMgr; shared_ptr<Shard> shard; - if (!conf->isSharded(nss.ns())) { - auto shardStatus = Grid::get(txn)->shardRegistry()->getShard(txn, conf->getPrimaryId()); - if (!shardStatus.isOK()) { - return shardStatus.getStatus(); - } - shard = shardStatus.getValue(); + if (!routingInfo.cm()) { + shard = routingInfo.primary(); } else { - chunkMgr = _getChunkManager(txn, conf, nss); + chunkMgr = routingInfo.cm(); const BSONObj query = cmdObj.getObjectField("query"); @@ -118,7 +110,7 @@ public: return collationElementStatus; } - StatusWith<BSONObj> status = _getShardKey(txn, chunkMgr, query); + StatusWith<BSONObj> status = _getShardKey(opCtx, *chunkMgr, query); if (!status.isOK()) { return status.getStatus(); } @@ -126,10 +118,12 @@ public: BSONObj shardKey = status.getValue(); auto chunk = chunkMgr->findIntersectingChunk(shardKey, collation); - auto shardStatus = Grid::get(txn)->shardRegistry()->getShard(txn, chunk->getShardId()); + auto shardStatus = + Grid::get(opCtx)->shardRegistry()->getShard(opCtx, chunk->getShardId()); if (!shardStatus.isOK()) { return shardStatus.getStatus(); } + shard = shardStatus.getValue(); } @@ -142,7 +136,7 @@ public: Timer timer; BSONObjBuilder result; - bool ok = _runCommand(txn, conf, chunkMgr, shard->getId(), nss, explainCmd.obj(), result); + bool ok = _runCommand(opCtx, chunkMgr, shard->getId(), nss, explainCmd.obj(), result); long long millisElapsed = timer.millis(); if (!ok) { @@ -160,27 +154,26 @@ public: shardResults.push_back(cmdResult); return ClusterExplain::buildExplainResult( - txn, shardResults, ClusterExplain::kSingleShard, millisElapsed, out); + opCtx, shardResults, ClusterExplain::kSingleShard, millisElapsed, out); } - virtual bool run(OperationContext* txn, - const std::string& dbName, - BSONObj& cmdObj, - int options, - std::string& errmsg, - BSONObjBuilder& result) { + bool run(OperationContext* opCtx, + const std::string& dbName, + BSONObj& cmdObj, + int options, + std::string& errmsg, + BSONObjBuilder& result) override { const NamespaceString nss = parseNsCollectionRequired(dbName, cmdObj); // findAndModify should only be creating database if upsert is true, but this would require // that the parsing be pulled into this function. - auto scopedDb = uassertStatusOK(ScopedShardDatabase::getOrCreate(txn, dbName)); - const auto conf = scopedDb.db(); - - if (!conf->isSharded(nss.ns())) { - return _runCommand(txn, conf, nullptr, conf->getPrimaryId(), nss, cmdObj, result); + auto routingInfo = + uassertStatusOK(Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfo(opCtx, nss)); + if (!routingInfo.cm()) { + return _runCommand(opCtx, nullptr, routingInfo.primaryId(), nss, cmdObj, result); } - shared_ptr<ChunkManager> chunkMgr = _getChunkManager(txn, conf, nss); + const auto chunkMgr = routingInfo.cm(); const BSONObj query = cmdObj.getObjectField("query"); @@ -194,40 +187,26 @@ public: return appendCommandStatus(result, collationElementStatus); } - StatusWith<BSONObj> status = _getShardKey(txn, chunkMgr, query); - if (!status.isOK()) { - // Bad query - return appendCommandStatus(result, status.getStatus()); - } + BSONObj shardKey = uassertStatusOK(_getShardKey(opCtx, *chunkMgr, query)); - BSONObj shardKey = status.getValue(); auto chunk = chunkMgr->findIntersectingChunk(shardKey, collation); - const bool ok = _runCommand(txn, conf, chunkMgr, chunk->getShardId(), nss, cmdObj, result); + const bool ok = _runCommand(opCtx, chunkMgr, chunk->getShardId(), nss, cmdObj, result); if (ok) { updateChunkWriteStatsAndSplitIfNeeded( - txn, chunkMgr.get(), chunk.get(), cmdObj.getObjectField("update").objsize()); + opCtx, chunkMgr.get(), chunk.get(), cmdObj.getObjectField("update").objsize()); } return ok; } private: - shared_ptr<ChunkManager> _getChunkManager(OperationContext* txn, - DBConfig* conf, - const NamespaceString& nss) const { - shared_ptr<ChunkManager> chunkMgr = conf->getChunkManager(txn, nss.ns()); - massert(13002, "shard internal error chunk manager should never be null", chunkMgr); - - return chunkMgr; - } - - StatusWith<BSONObj> _getShardKey(OperationContext* txn, - shared_ptr<ChunkManager> chunkMgr, - const BSONObj& query) const { + static StatusWith<BSONObj> _getShardKey(OperationContext* opCtx, + const ChunkManager& chunkMgr, + const BSONObj& query) { // Verify that the query has an equality predicate using the shard key StatusWith<BSONObj> status = - chunkMgr->getShardKeyPattern().extractShardKeyFromQuery(txn, query); + chunkMgr.getShardKeyPattern().extractShardKeyFromQuery(opCtx, query); if (!status.isOK()) { return status; @@ -243,19 +222,19 @@ private: return shardKey; } - bool _runCommand(OperationContext* txn, - DBConfig* conf, - shared_ptr<ChunkManager> chunkManager, - const ShardId& shardId, - const NamespaceString& nss, - const BSONObj& cmdObj, - BSONObjBuilder& result) const { + static bool _runCommand(OperationContext* opCtx, + shared_ptr<ChunkManager> chunkManager, + const ShardId& shardId, + const NamespaceString& nss, + const BSONObj& cmdObj, + BSONObjBuilder& result) { BSONObj res; - const auto shard = uassertStatusOK(Grid::get(txn)->shardRegistry()->getShard(txn, shardId)); + const auto shard = + uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardId)); ShardConnection conn(shard->getConnString(), nss.ns(), chunkManager); - bool ok = conn->runCommand(conf->name(), cmdObj, res); + bool ok = conn->runCommand(nss.db().toString(), cmdObj, res); conn.done(); // ErrorCodes::RecvStaleConfig is the code for RecvStaleConfigException. diff --git a/src/mongo/s/commands/cluster_flush_router_config_cmd.cpp b/src/mongo/s/commands/cluster_flush_router_config_cmd.cpp index 4f489ab28c1..8931166863c 100644 --- a/src/mongo/s/commands/cluster_flush_router_config_cmd.cpp +++ b/src/mongo/s/commands/cluster_flush_router_config_cmd.cpp @@ -64,13 +64,13 @@ public: out->push_back(Privilege(ResourcePattern::forClusterResource(), actions)); } - virtual bool run(OperationContext* txn, + virtual bool run(OperationContext* opCtx, const std::string& dbname, BSONObj& cmdObj, int options, std::string& errmsg, BSONObjBuilder& result) { - Grid::get(txn)->catalogCache()->invalidateAll(); + Grid::get(opCtx)->catalogCache()->purgeAllDatabases(); result.appendBool("flushed", true); return true; diff --git a/src/mongo/s/commands/cluster_get_shard_version_cmd.cpp b/src/mongo/s/commands/cluster_get_shard_version_cmd.cpp index 3ab79ef5364..2bb23453bba 100644 --- a/src/mongo/s/commands/cluster_get_shard_version_cmd.cpp +++ b/src/mongo/s/commands/cluster_get_shard_version_cmd.cpp @@ -35,8 +35,9 @@ #include "mongo/db/auth/authorization_manager.h" #include "mongo/db/auth/authorization_session.h" #include "mongo/db/commands.h" +#include "mongo/s/catalog_cache.h" +#include "mongo/s/commands/cluster_commands_common.h" #include "mongo/s/grid.h" -#include "mongo/s/sharding_raii.h" #include "mongo/util/log.h" namespace mongo { @@ -78,7 +79,7 @@ public: return parseNsFullyQualified(dbname, cmdObj); } - bool run(OperationContext* txn, + bool run(OperationContext* opCtx, const std::string& dbname, BSONObj& cmdObj, int options, @@ -86,13 +87,10 @@ public: BSONObjBuilder& result) override { const NamespaceString nss(parseNs(dbname, cmdObj)); - auto scopedDb = uassertStatusOK(ScopedShardDatabase::getExisting(txn, nss.db())); - auto config = scopedDb.db(); + auto routingInfo = getShardedCollection(opCtx, nss); + const auto cm = routingInfo.cm(); - auto cm = config->getChunkManagerIfExists(txn, nss.ns()); - uassert(ErrorCodes::NamespaceNotSharded, "ns [" + nss.ns() + " is not sharded.", cm); - - for (const auto& cmEntry : cm->getChunkMap()) { + for (const auto& cmEntry : cm->chunkMap()) { log() << redact(cmEntry.second->toString()); } diff --git a/src/mongo/s/commands/cluster_map_reduce_cmd.cpp b/src/mongo/s/commands/cluster_map_reduce_cmd.cpp index 6d4a4155365..b155d322b3e 100644 --- a/src/mongo/s/commands/cluster_map_reduce_cmd.cpp +++ b/src/mongo/s/commands/cluster_map_reduce_cmd.cpp @@ -45,27 +45,17 @@ #include "mongo/s/catalog/dist_lock_manager.h" #include "mongo/s/catalog/sharding_catalog_client.h" #include "mongo/s/catalog_cache.h" -#include "mongo/s/chunk_manager.h" #include "mongo/s/client/shard_connection.h" #include "mongo/s/client/shard_registry.h" #include "mongo/s/commands/cluster_commands_common.h" #include "mongo/s/commands/cluster_write.h" #include "mongo/s/commands/sharded_command_processing.h" #include "mongo/s/commands/strategy.h" -#include "mongo/s/config.h" #include "mongo/s/grid.h" -#include "mongo/s/sharding_raii.h" #include "mongo/stdx/chrono.h" #include "mongo/util/log.h" namespace mongo { - -using std::shared_ptr; -using std::map; -using std::set; -using std::string; -using std::vector; - namespace { AtomicUInt32 JOB_NUMBER; @@ -75,7 +65,7 @@ const Milliseconds kNoDistLockTimeout(-1); /** * Generates a unique name for the temporary M/R output collection. */ -string getTmpName(StringData coll) { +std::string getTmpName(StringData coll) { return str::stream() << "tmp.mrs." << coll << "_" << time(0) << "_" << JOB_NUMBER.fetchAndAdd(1); } @@ -85,14 +75,14 @@ string getTmpName(StringData coll) { * be sent to the shards as part of the first phase of map/reduce. */ BSONObj fixForShards(const BSONObj& orig, - const string& output, - string& badShardedField, + const std::string& output, + std::string& badShardedField, int maxChunkSizeBytes) { BSONObjBuilder b; BSONObjIterator i(orig); while (i.more()) { BSONElement e = i.next(); - const string fn = e.fieldName(); + const std::string fn = e.fieldName(); if (fn == bypassDocumentValidationCommandOption() || fn == "map" || fn == "mapreduce" || fn == "mapReduce" || fn == "mapparams" || fn == "reduce" || fn == "query" || @@ -160,47 +150,49 @@ class MRCmd : public Command { public: MRCmd() : Command("mapReduce", false, "mapreduce") {} - virtual bool slaveOk() const { + bool slaveOk() const override { return true; } - virtual bool adminOnly() const { + bool adminOnly() const override { return false; } - virtual bool supportsWriteConcern(const BSONObj& cmd) const override { + std::string parseNs(const std::string& dbname, const BSONObj& cmdObj) const override { + return parseNsCollectionRequired(dbname, cmdObj).ns(); + } + + bool supportsWriteConcern(const BSONObj& cmd) const override { return mr::mrSupportsWriteConcern(cmd); } - virtual void help(std::stringstream& help) const { + void help(std::stringstream& help) const override { help << "Runs the sharded map/reduce command"; } - virtual void addRequiredPrivileges(const std::string& dbname, - const BSONObj& cmdObj, - std::vector<Privilege>* out) { + void addRequiredPrivileges(const std::string& dbname, + const BSONObj& cmdObj, + std::vector<Privilege>* out) override { mr::addPrivilegesRequiredForMapReduce(this, dbname, cmdObj, out); } - virtual bool run(OperationContext* txn, - const std::string& dbname, - BSONObj& cmdObj, - int options, - std::string& errmsg, - BSONObjBuilder& result) { + bool run(OperationContext* opCtx, + const std::string& dbname, + BSONObj& cmdObj, + int options, + std::string& errmsg, + BSONObjBuilder& result) override { Timer t; const NamespaceString nss(parseNs(dbname, cmdObj)); - uassert(ErrorCodes::InvalidNamespace, "Invalid namespace", nss.isValid()); - - const string shardResultCollection = getTmpName(nss.coll()); + const std::string shardResultCollection = getTmpName(nss.coll()); bool shardedOutput = false; - NamespaceString outputCollNss; bool customOutDB = false; + NamespaceString outputCollNss; bool inlineOutput = false; - string outDB = dbname; + std::string outDB = dbname; BSONElement outElmt = cmdObj.getField("out"); if (outElmt.type() == Object) { @@ -218,7 +210,8 @@ public: !customOut.hasField("db")); } else { // Mode must be 1st element - const string finalColShort = customOut.firstElement().str(); + const std::string finalColShort = customOut.firstElement().str(); + if (customOut.hasField("db")) { customOutDB = true; outDB = customOut.getField("db").str(); @@ -231,77 +224,71 @@ public: } } - // Ensure the input database exists - auto status = Grid::get(txn)->catalogCache()->getDatabase(txn, dbname); - if (!status.isOK()) { - return appendCommandStatus(result, status.getStatus()); - } + auto const catalogCache = Grid::get(opCtx)->catalogCache(); - shared_ptr<DBConfig> confIn = status.getValue(); + // Ensure the input database exists and set up the input collection + auto inputRoutingInfo = uassertStatusOK(catalogCache->getCollectionRoutingInfo(opCtx, nss)); - shared_ptr<DBConfig> confOut; - if (customOutDB) { - // Create the output database implicitly, since we have a custom output requested - auto scopedDb = uassertStatusOK(ScopedShardDatabase::getOrCreate(txn, outDB)); - confOut = scopedDb.getSharedDbReference(); - } else { - confOut = confIn; - } + const bool shardedInput = inputRoutingInfo.cm() != nullptr; - if (confOut->getPrimaryId() == "config" && !inlineOutput) { - return appendCommandStatus( - result, - Status(ErrorCodes::CommandNotSupported, - str::stream() << "Can not execute mapReduce with output database " << outDB - << " which lives on config servers")); + // Create the output database implicitly if we have a custom output requested + if (customOutDB) { + uassertStatusOK(createShardDatabase(opCtx, outDB)); } - const bool shardedInput = confIn && confIn->isSharded(nss.ns()); - - if (!shardedOutput) { - uassert(15920, - "Cannot output to a non-sharded collection because " - "sharded collection exists already", - !confOut->isSharded(outputCollNss.ns())); - - // TODO: Should we also prevent going from non-sharded to sharded? During the - // transition client may see partial data. - } + // Ensure that the output database doesn't reside on the config server + auto outputDbInfo = uassertStatusOK(catalogCache->getDatabase(opCtx, outDB)); + uassert(ErrorCodes::CommandNotSupported, + str::stream() << "Can not execute mapReduce with output database " << outDB + << " which lives on config servers", + inlineOutput || outputDbInfo.primaryId() != "config"); int64_t maxChunkSizeBytes = 0; + if (shardedOutput) { // Will need to figure out chunks, ask shards for points maxChunkSizeBytes = cmdObj["maxChunkSizeBytes"].numberLong(); if (maxChunkSizeBytes == 0) { maxChunkSizeBytes = - Grid::get(txn)->getBalancerConfiguration()->getMaxChunkSizeBytes(); + Grid::get(opCtx)->getBalancerConfiguration()->getMaxChunkSizeBytes(); } // maxChunkSizeBytes is sent as int BSON field invariant(maxChunkSizeBytes < std::numeric_limits<int>::max()); + } else if (outputCollNss.isValid()) { + auto outputRoutingInfo = + uassertStatusOK(catalogCache->getCollectionRoutingInfo(opCtx, outputCollNss)); + + uassert(15920, + "Cannot output to a non-sharded collection because " + "sharded collection exists already", + !outputRoutingInfo.cm()); + + // TODO: Should we also prevent going from non-sharded to sharded? During the + // transition client may see partial data. } - const auto shardRegistry = Grid::get(txn)->shardRegistry(); + const auto shardRegistry = Grid::get(opCtx)->shardRegistry(); // modify command to run on shards with output to tmp collection - string badShardedField; + std::string badShardedField; BSONObj shardedCommand = fixForShards(cmdObj, shardResultCollection, badShardedField, maxChunkSizeBytes); if (!shardedInput && !shardedOutput && !customOutDB) { LOG(1) << "simple MR, just passthrough"; - const auto shard = - uassertStatusOK(shardRegistry->getShard(txn, confIn->getPrimaryId())); + invariant(inputRoutingInfo.primary()); - ShardConnection conn(shard->getConnString(), ""); + ShardConnection conn(inputRoutingInfo.primary()->getConnString(), ""); BSONObj res; bool ok = conn->runCommand(dbname, cmdObj, res); conn.done(); if (auto wcErrorElem = res["writeConcernError"]) { - appendWriteConcernErrorToCmdResponse(shard->getId(), wcErrorElem, result); + appendWriteConcernErrorToCmdResponse( + inputRoutingInfo.primary()->getId(), wcErrorElem, result); } result.appendElementsUnique(res); @@ -323,12 +310,13 @@ public: collation = cmdObj["collation"].embeddedObjectUserCheck(); } - set<string> servers; - vector<Strategy::CommandResult> mrCommandResults; + std::set<std::string> servers; + std::vector<Strategy::CommandResult> mrCommandResults; BSONObjBuilder shardResultsB; BSONObjBuilder shardCountsB; - map<string, int64_t> countsMap; + std::map<std::string, int64_t> countsMap; + auto splitPts = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); { @@ -338,7 +326,7 @@ public: try { Strategy::commandOp( - txn, dbname, shardedCommand, 0, nss.ns(), q, collation, &mrCommandResults); + opCtx, dbname, shardedCommand, 0, nss.ns(), q, collation, &mrCommandResults); } catch (DBException& e) { e.addContext(str::stream() << "could not run map command on all shards for ns " << nss.ns() @@ -349,12 +337,12 @@ public: for (const auto& mrResult : mrCommandResults) { // Need to gather list of all servers even if an error happened - string server; - { + const auto server = [&]() { const auto shard = - uassertStatusOK(shardRegistry->getShard(txn, mrResult.shardTargetId)); - server = shard->getConnString().toString(); - } + uassertStatusOK(shardRegistry->getShard(opCtx, mrResult.shardTargetId)); + return shard->getConnString().toString(); + }(); + servers.insert(server); if (!ok) { @@ -386,15 +374,14 @@ public: if (singleResult.hasField("splitKeys")) { BSONElement splitKeys = singleResult.getField("splitKeys"); - vector<BSONElement> pts = splitKeys.Array(); - for (vector<BSONElement>::iterator it = pts.begin(); it != pts.end(); ++it) { - splitPts.insert(it->Obj().getOwned()); + for (const auto& splitPt : splitKeys.Array()) { + splitPts.insert(splitPt.Obj().getOwned()); } } } if (!ok) { - _cleanUp(servers, dbname, shardResultCollection); + cleanUp(servers, dbname, shardResultCollection); // Add "code" to the top-level response, if the failure of the sharded command // can be accounted to a single error. @@ -413,7 +400,7 @@ public: finalCmd.append("inputDB", dbname); finalCmd.append("shardedOutputCollection", shardResultCollection); finalCmd.append("shards", shardResultsB.done()); - finalCmd.append("writeConcern", txn->getWriteConcern().toBSON()); + finalCmd.append("writeConcern", opCtx->getWriteConcern().toBSON()); BSONObj shardCounts = shardCountsB.done(); finalCmd.append("shardCounts", shardCounts); @@ -442,16 +429,15 @@ public: bool ok = true; BSONObj singleResult; - bool hasWCError = false; if (!shardedOutput) { - const auto shard = - uassertStatusOK(shardRegistry->getShard(txn, confOut->getPrimaryId())); + LOG(1) << "MR with single shard output, NS=" << outputCollNss + << " primary=" << outputDbInfo.primaryId(); - LOG(1) << "MR with single shard output, NS=" << outputCollNss.ns() - << " primary=" << shard->toString(); + const auto outputShard = + uassertStatusOK(shardRegistry->getShard(opCtx, outputDbInfo.primaryId())); - ShardConnection conn(shard->getConnString(), outputCollNss.ns()); + ShardConnection conn(outputShard->getConnString(), outputCollNss.ns()); ok = conn->runCommand(outDB, finalCmd.obj(), singleResult); BSONObj counts = singleResult.getObjectField("counts"); @@ -460,87 +446,27 @@ public: outputCount = counts.getIntField("output"); conn.done(); - if (!hasWCError) { - if (auto wcErrorElem = singleResult["writeConcernError"]) { - appendWriteConcernErrorToCmdResponse(shard->getId(), wcErrorElem, result); - hasWCError = true; - } + + if (auto wcErrorElem = singleResult["writeConcernError"]) { + appendWriteConcernErrorToCmdResponse(outputShard->getId(), wcErrorElem, result); } } else { LOG(1) << "MR with sharded output, NS=" << outputCollNss.ns(); - // Create the sharded collection if needed - if (!confOut->isSharded(outputCollNss.ns())) { - // Enable sharding on the output db - Status status = Grid::get(txn)->catalogClient(txn)->enableSharding( - txn, outputCollNss.db().toString()); - - // If the database has sharding already enabled, we can ignore the error - if (status.isOK()) { - // Invalidate the output database so it gets reloaded on the next fetch attempt - Grid::get(txn)->catalogCache()->invalidate(outputCollNss.db()); - } else if (status != ErrorCodes::AlreadyInitialized) { - uassertStatusOK(status); - } - - confOut.reset(); - confOut = uassertStatusOK(Grid::get(txn)->catalogCache()->getDatabase( - txn, outputCollNss.db().toString())); + auto outputRoutingInfo = + uassertStatusOK(catalogCache->getCollectionRoutingInfo(opCtx, outputCollNss)); - // Shard collection according to split points - vector<BSONObj> sortedSplitPts; - - // Points will be properly sorted using the set - for (const auto& splitPt : splitPts) { - sortedSplitPts.push_back(splitPt); - } - - // Pre-split the collection onto all the shards for this database. Note that - // it's not completely safe to pre-split onto non-primary shards using the - // shardcollection method (a conflict may result if multiple map-reduces are - // writing to the same output collection, for instance). - // - // TODO: pre-split mapReduce output in a safer way. - - const std::set<ShardId> outShardIds = [&]() { - std::vector<ShardId> shardIds; - shardRegistry->getAllShardIds(&shardIds); - uassert(ErrorCodes::ShardNotFound, - str::stream() - << "Unable to find shards on which to place output collection " - << outputCollNss.ns(), - !shardIds.empty()); - - return std::set<ShardId>(shardIds.begin(), shardIds.end()); - }(); - - - BSONObj sortKey = BSON("_id" << 1); - ShardKeyPattern sortKeyPattern(sortKey); - - // The collection default collation for the output collection. This is empty, - // representing the simple binary comparison collation. - BSONObj defaultCollation; - - uassertStatusOK( - Grid::get(txn)->catalogClient(txn)->shardCollection(txn, - outputCollNss.ns(), - sortKeyPattern, - defaultCollation, - true, - sortedSplitPts, - outShardIds)); - - // Make sure the cached metadata for the collection knows that we are now sharded - confOut->getChunkManager(txn, outputCollNss.ns(), true /* reload */); + // Create the sharded collection if needed + if (!outputRoutingInfo.cm()) { + outputRoutingInfo = createShardedOutputCollection(opCtx, outputCollNss, splitPts); } auto chunkSizes = SimpleBSONObjComparator::kInstance.makeBSONObjIndexedMap<int>(); { // Take distributed lock to prevent split / migration. auto scopedDistLock = - Grid::get(txn)->catalogClient(txn)->getDistLockManager()->lock( - txn, outputCollNss.ns(), "mr-post-process", kNoDistLockTimeout); + Grid::get(opCtx)->catalogClient(opCtx)->getDistLockManager()->lock( + opCtx, outputCollNss.ns(), "mr-post-process", kNoDistLockTimeout); if (!scopedDistLock.isOK()) { return appendCommandStatus(result, scopedDistLock.getStatus()); } @@ -550,7 +476,7 @@ public: try { const BSONObj query; - Strategy::commandOp(txn, + Strategy::commandOp(opCtx, outDB, finalCmdObj, 0, @@ -567,13 +493,16 @@ public: throw; } + bool hasWCError = false; + for (const auto& mrResult : mrCommandResults) { - string server; - { - const auto shard = uassertStatusOK( - Grid::get(txn)->shardRegistry()->getShard(txn, mrResult.shardTargetId)); - server = shard->getConnString().toString(); - } + const auto server = [&]() { + const auto shard = + uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard( + opCtx, mrResult.shardTargetId)); + return shard->getConnString().toString(); + }(); + singleResult = mrResult.result; if (!hasWCError) { if (auto wcErrorElem = singleResult["writeConcernError"]) { @@ -596,7 +525,8 @@ public: // get the size inserted for each chunk // split cannot be called here since we already have the distributed lock if (singleResult.hasField("chunkSizes")) { - vector<BSONElement> sizes = singleResult.getField("chunkSizes").Array(); + std::vector<BSONElement> sizes = + singleResult.getField("chunkSizes").Array(); for (unsigned int i = 0; i < sizes.size(); i += 2) { BSONObj key = sizes[i].Obj().getOwned(); const long long size = sizes[i + 1].numberLong(); @@ -609,33 +539,37 @@ public: } // Do the splitting round - shared_ptr<ChunkManager> cm = confOut->getChunkManagerIfExists(txn, outputCollNss.ns()); + catalogCache->onStaleConfigError(std::move(outputRoutingInfo)); + outputRoutingInfo = + uassertStatusOK(catalogCache->getCollectionRoutingInfo(opCtx, outputCollNss)); uassert(34359, str::stream() << "Failed to write mapreduce output to " << outputCollNss.ns() << "; expected that collection to be sharded, but it was not", - cm); + outputRoutingInfo.cm()); + + const auto outputCM = outputRoutingInfo.cm(); for (const auto& chunkSize : chunkSizes) { BSONObj key = chunkSize.first; const int size = chunkSize.second; invariant(size < std::numeric_limits<int>::max()); - // key reported should be the chunk's minimum - shared_ptr<Chunk> c = cm->findIntersectingChunkWithSimpleCollation(key); + // Key reported should be the chunk's minimum + auto c = outputCM->findIntersectingChunkWithSimpleCollation(key); if (!c) { warning() << "Mongod reported " << size << " bytes inserted for key " << key << " but can't find chunk"; } else { - updateChunkWriteStatsAndSplitIfNeeded(txn, cm.get(), c.get(), size); + updateChunkWriteStatsAndSplitIfNeeded(opCtx, outputCM.get(), c.get(), size); } } } - _cleanUp(servers, dbname, shardResultCollection); + cleanUp(servers, dbname, shardResultCollection); if (!ok) { errmsg = str::stream() << "MR post processing failed: " << singleResult.toString(); - return 0; + return false; } // copy some elements from a single result @@ -670,9 +604,69 @@ public: private: /** + * Creates and shards the collection for the output results. + */ + static CachedCollectionRoutingInfo createShardedOutputCollection(OperationContext* opCtx, + const NamespaceString& nss, + const BSONObjSet& splitPts) { + auto const catalogClient = Grid::get(opCtx)->catalogClient(opCtx); + auto const catalogCache = Grid::get(opCtx)->catalogCache(); + auto const shardRegistry = Grid::get(opCtx)->shardRegistry(); + + // Enable sharding on the output db + Status status = catalogClient->enableSharding(opCtx, nss.db().toString()); + + // If the database has sharding already enabled, we can ignore the error + if (status.isOK()) { + // Invalidate the output database so it gets reloaded on the next fetch attempt + catalogCache->purgeDatabase(nss.db()); + } else if (status != ErrorCodes::AlreadyInitialized) { + uassertStatusOK(status); + } + + // Points will be properly sorted using the set + const std::vector<BSONObj> sortedSplitPts(splitPts.begin(), splitPts.end()); + + // Pre-split the collection onto all the shards for this database. Note that + // it's not completely safe to pre-split onto non-primary shards using the + // shardcollection method (a conflict may result if multiple map-reduces are + // writing to the same output collection, for instance). + // + // TODO: pre-split mapReduce output in a safer way. + + const std::set<ShardId> outShardIds = [&]() { + std::vector<ShardId> shardIds; + shardRegistry->getAllShardIds(&shardIds); + uassert(ErrorCodes::ShardNotFound, + str::stream() << "Unable to find shards on which to place output collection " + << nss.ns(), + !shardIds.empty()); + + return std::set<ShardId>(shardIds.begin(), shardIds.end()); + }(); + + + BSONObj sortKey = BSON("_id" << 1); + ShardKeyPattern sortKeyPattern(sortKey); + + // The collection default collation for the output collection. This is empty, + // representing the simple binary comparison collation. + BSONObj defaultCollation; + + uassertStatusOK(Grid::get(opCtx)->catalogClient(opCtx)->shardCollection( + opCtx, nss.ns(), sortKeyPattern, defaultCollation, true, sortedSplitPts, outShardIds)); + + // Make sure the cached metadata for the collection knows that we are now sharded + catalogCache->invalidateShardedCollection(nss); + return uassertStatusOK(catalogCache->getCollectionRoutingInfo(opCtx, nss)); + } + + /** * Drops the temporary results collections from each shard. */ - void _cleanUp(const set<string>& servers, string dbName, string shardResultCollection) { + static void cleanUp(const std::set<std::string>& servers, + const std::string& dbName, + const std::string& shardResultCollection) { try { // drop collections with tmp results on each shard for (const auto& server : servers) { diff --git a/src/mongo/s/commands/cluster_merge_chunks_cmd.cpp b/src/mongo/s/commands/cluster_merge_chunks_cmd.cpp index 2aaeeaeabb0..2a85b645df0 100644 --- a/src/mongo/s/commands/cluster_merge_chunks_cmd.cpp +++ b/src/mongo/s/commands/cluster_merge_chunks_cmd.cpp @@ -37,12 +37,10 @@ #include "mongo/db/namespace_string.h" #include "mongo/s/catalog/sharding_catalog_client.h" #include "mongo/s/catalog_cache.h" -#include "mongo/s/chunk_manager.h" #include "mongo/s/client/shard_connection.h" #include "mongo/s/client/shard_registry.h" -#include "mongo/s/config.h" +#include "mongo/s/commands/cluster_commands_common.h" #include "mongo/s/grid.h" -#include "mongo/s/sharding_raii.h" namespace mongo { @@ -60,14 +58,14 @@ class ClusterMergeChunksCommand : public Command { public: ClusterMergeChunksCommand() : Command("mergeChunks") {} - virtual void help(stringstream& h) const { + void help(stringstream& h) const override { h << "Merge Chunks command\n" << "usage: { mergeChunks : <ns>, bounds : [ <min key>, <max key> ] }"; } - virtual Status checkAuthForCommand(Client* client, - const std::string& dbname, - const BSONObj& cmdObj) { + Status checkAuthForCommand(Client* client, + const std::string& dbname, + const BSONObj& cmdObj) override { if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource( ResourcePattern::forExactNamespace(NamespaceString(parseNs(dbname, cmdObj))), ActionType::splitChunk)) { @@ -76,17 +74,19 @@ public: return Status::OK(); } - virtual std::string parseNs(const std::string& dbname, const BSONObj& cmdObj) const { + std::string parseNs(const std::string& dbname, const BSONObj& cmdObj) const override { return parseNsFullyQualified(dbname, cmdObj); } - virtual bool adminOnly() const { + bool adminOnly() const override { return true; } - virtual bool slaveOk() const { + + bool slaveOk() const override { return false; } - virtual bool supportsWriteConcern(const BSONObj& cmd) const override { + + bool supportsWriteConcern(const BSONObj& cmd) const override { return false; } @@ -99,15 +99,18 @@ public: static BSONField<string> configField; - bool run(OperationContext* txn, + bool run(OperationContext* opCtx, const string& dbname, BSONObj& cmdObj, int, string& errmsg, - BSONObjBuilder& result) { + BSONObjBuilder& result) override { const NamespaceString nss(parseNs(dbname, cmdObj)); - auto scopedCM = uassertStatusOK(ScopedChunkManager::refreshAndGet(txn, nss)); + auto routingInfo = uassertStatusOK( + Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(opCtx, + nss)); + const auto cm = routingInfo.cm(); vector<BSONObj> bounds; if (!FieldParser::extract(cmdObj, boundsField, &bounds, &errmsg)) { @@ -137,8 +140,6 @@ public: return false; } - auto const cm = scopedCM.cm(); - if (!cm->getShardKeyPattern().isShardKey(minKey) || !cm->getShardKeyPattern().isShardKey(maxKey)) { errmsg = stream() << "shard key bounds " @@ -158,7 +159,7 @@ public: remoteCmdObjB.append(cmdObj[ClusterMergeChunksCommand::boundsField()]); remoteCmdObjB.append( ClusterMergeChunksCommand::configField(), - Grid::get(txn)->shardRegistry()->getConfigServerConnectionString().toString()); + Grid::get(opCtx)->shardRegistry()->getConfigServerConnectionString().toString()); remoteCmdObjB.append(ClusterMergeChunksCommand::shardNameField(), firstChunk->getShardId().toString()); @@ -167,7 +168,7 @@ public: // Throws, but handled at level above. Don't want to rewrap to preserve exception // formatting. const auto shardStatus = - Grid::get(txn)->shardRegistry()->getShard(txn, firstChunk->getShardId()); + Grid::get(opCtx)->shardRegistry()->getShard(opCtx, firstChunk->getShardId()); if (!shardStatus.isOK()) { return appendCommandStatus( result, @@ -179,6 +180,8 @@ public: bool ok = conn->runCommand("admin", remoteCmdObjB.obj(), remoteResult); conn.done(); + Grid::get(opCtx)->catalogCache()->onStaleConfigError(std::move(routingInfo)); + result.appendElements(remoteResult); return ok; } diff --git a/src/mongo/s/commands/cluster_move_chunk_cmd.cpp b/src/mongo/s/commands/cluster_move_chunk_cmd.cpp index f0e4e7f6973..23351cf92a5 100644 --- a/src/mongo/s/commands/cluster_move_chunk_cmd.cpp +++ b/src/mongo/s/commands/cluster_move_chunk_cmd.cpp @@ -40,12 +40,11 @@ #include "mongo/db/write_concern_options.h" #include "mongo/s/balancer_configuration.h" #include "mongo/s/catalog_cache.h" -#include "mongo/s/client/shard_connection.h" #include "mongo/s/client/shard_registry.h" +#include "mongo/s/commands/cluster_commands_common.h" #include "mongo/s/config_server_client.h" #include "mongo/s/grid.h" #include "mongo/s/migration_secondary_throttle_options.h" -#include "mongo/s/sharding_raii.h" #include "mongo/util/log.h" #include "mongo/util/timer.h" @@ -60,19 +59,19 @@ class MoveChunkCmd : public Command { public: MoveChunkCmd() : Command("moveChunk", false, "movechunk") {} - virtual bool slaveOk() const { + bool slaveOk() const override { return true; } - virtual bool adminOnly() const { + bool adminOnly() const override { return true; } - virtual bool supportsWriteConcern(const BSONObj& cmd) const override { + bool supportsWriteConcern(const BSONObj& cmd) const override { return true; } - virtual void help(std::stringstream& help) const { + void help(std::stringstream& help) const override { help << "Example: move chunk that contains the doc {num : 7} to shard001\n" << " { movechunk : 'test.foo' , find : { num : 7 } , to : 'shard0001' }\n" << "Example: move chunk with lower bound 0 and upper bound 10 to shard001\n" @@ -80,9 +79,9 @@ public: << " , to : 'shard001' }\n"; } - virtual Status checkAuthForCommand(Client* client, - const std::string& dbname, - const BSONObj& cmdObj) { + Status checkAuthForCommand(Client* client, + const std::string& dbname, + const BSONObj& cmdObj) override { if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource( ResourcePattern::forExactNamespace(NamespaceString(parseNs(dbname, cmdObj))), ActionType::moveChunk)) { @@ -92,21 +91,24 @@ public: return Status::OK(); } - virtual std::string parseNs(const std::string& dbname, const BSONObj& cmdObj) const { + std::string parseNs(const std::string& dbname, const BSONObj& cmdObj) const override { return parseNsFullyQualified(dbname, cmdObj); } - virtual bool run(OperationContext* txn, - const std::string& dbname, - BSONObj& cmdObj, - int options, - std::string& errmsg, - BSONObjBuilder& result) { + bool run(OperationContext* opCtx, + const std::string& dbname, + BSONObj& cmdObj, + int options, + std::string& errmsg, + BSONObjBuilder& result) override { Timer t; const NamespaceString nss(parseNs(dbname, cmdObj)); - auto scopedCM = uassertStatusOK(ScopedChunkManager::refreshAndGet(txn, nss)); + auto routingInfo = uassertStatusOK( + Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(opCtx, + nss)); + const auto cm = routingInfo.cm(); const string toString = cmdObj["to"].valuestrsafe(); if (!toString.size()) { @@ -114,7 +116,7 @@ public: return false; } - const auto toStatus = Grid::get(txn)->shardRegistry()->getShard(txn, toString); + const auto toStatus = Grid::get(opCtx)->shardRegistry()->getShard(opCtx, toString); if (!toStatus.isOK()) { string msg(str::stream() << "Could not move chunk in '" << nss.ns() << "' to shard '" << toString @@ -128,7 +130,8 @@ public: // so far, chunk size serves test purposes; it may or may not become a supported parameter long long maxChunkSizeBytes = cmdObj["maxChunkSizeBytes"].numberLong(); if (maxChunkSizeBytes == 0) { - maxChunkSizeBytes = Grid::get(txn)->getBalancerConfiguration()->getMaxChunkSizeBytes(); + maxChunkSizeBytes = + Grid::get(opCtx)->getBalancerConfiguration()->getMaxChunkSizeBytes(); } BSONObj find = cmdObj.getObjectField("find"); @@ -140,14 +143,12 @@ public: return false; } - auto const cm = scopedCM.cm(); - shared_ptr<Chunk> chunk; if (!find.isEmpty()) { // find BSONObj shardKey = - uassertStatusOK(cm->getShardKeyPattern().extractShardKeyFromQuery(txn, find)); + uassertStatusOK(cm->getShardKeyPattern().extractShardKeyFromQuery(opCtx, find)); if (shardKey.isEmpty()) { errmsg = str::stream() << "no shard key found in chunk query " << find; return false; @@ -187,16 +188,14 @@ public: chunkType.setShard(chunk->getShardId()); chunkType.setVersion(cm->getVersion()); - uassertStatusOK(configsvr_client::moveChunk(txn, + uassertStatusOK(configsvr_client::moveChunk(opCtx, chunkType, to->getId(), maxChunkSizeBytes, secondaryThrottle, cmdObj["_waitForDelete"].trueValue())); - // Proactively refresh the chunk manager. Not strictly necessary, but this way it's - // immediately up-to-date the next time it's used. - scopedCM.db()->getChunkManagerIfExists(txn, nss.ns(), true); + Grid::get(opCtx)->catalogCache()->onStaleConfigError(std::move(routingInfo)); result.append("millis", t.millis()); return true; diff --git a/src/mongo/s/commands/cluster_move_primary_cmd.cpp b/src/mongo/s/commands/cluster_move_primary_cmd.cpp index fefd9bbd47d..28e79d1a962 100644 --- a/src/mongo/s/commands/cluster_move_primary_cmd.cpp +++ b/src/mongo/s/commands/cluster_move_primary_cmd.cpp @@ -49,7 +49,6 @@ #include "mongo/s/client/shard_registry.h" #include "mongo/s/commands/cluster_commands_common.h" #include "mongo/s/commands/sharded_command_processing.h" -#include "mongo/s/config.h" #include "mongo/s/grid.h" #include "mongo/s/set_shard_version_request.h" #include "mongo/util/log.h" @@ -98,7 +97,7 @@ public: return cmdObj.firstElement().str(); } - virtual bool run(OperationContext* txn, + virtual bool run(OperationContext* opCtx, const std::string& dbname_unused, BSONObj& cmdObj, int options, @@ -117,14 +116,14 @@ public: return false; } - auto const catalogClient = Grid::get(txn)->catalogClient(txn); - auto const catalogCache = Grid::get(txn)->catalogCache(); - auto const shardRegistry = Grid::get(txn)->shardRegistry(); + auto const catalogClient = Grid::get(opCtx)->catalogClient(opCtx); + auto const catalogCache = Grid::get(opCtx)->catalogCache(); + auto const shardRegistry = Grid::get(opCtx)->shardRegistry(); // Flush all cached information. This can't be perfect, but it's better than nothing. - catalogCache->invalidate(dbname); + catalogCache->purgeDatabase(dbname); - auto config = uassertStatusOK(catalogCache->getDatabase(txn, dbname)); + auto dbInfo = uassertStatusOK(catalogCache->getDatabase(opCtx, dbname)); const string to = cmdObj["to"].valuestrsafe(); if (!to.size()) { @@ -132,11 +131,9 @@ public: return false; } - const auto fromShard = - uassertStatusOK(shardRegistry->getShard(txn, config->getPrimaryId())); - + const auto fromShard = uassertStatusOK(shardRegistry->getShard(opCtx, dbInfo.primaryId())); const auto toShard = [&]() { - auto toShardStatus = shardRegistry->getShard(txn, to); + auto toShardStatus = shardRegistry->getShard(opCtx, to); if (!toShardStatus.isOK()) { const std::string msg( str::stream() << "Could not move database '" << dbname << "' to shard '" << to @@ -158,13 +155,13 @@ public: const std::string whyMessage(str::stream() << "Moving primary shard of " << dbname); auto scopedDistLock = uassertStatusOK(catalogClient->getDistLockManager()->lock( - txn, dbname + "-movePrimary", whyMessage, DistLockManager::kDefaultLockTimeout)); + opCtx, dbname + "-movePrimary", whyMessage, DistLockManager::kDefaultLockTimeout)); - const auto shardedColls = getAllShardedCollectionsForDb(txn, dbname); + const auto shardedColls = getAllShardedCollectionsForDb(opCtx, dbname); // Record start in changelog catalogClient->logChange( - txn, + opCtx, "movePrimary.start", dbname, _buildMoveLogEntry(dbname, fromShard->toString(), toShard->toString(), shardedColls), @@ -203,7 +200,7 @@ public: << bypassDocumentValidationCommandOption() << true << "writeConcern" - << txn->getWriteConcern().toBSON()), + << opCtx->getWriteConcern().toBSON()), cloneRes); toconn.done(); @@ -221,15 +218,15 @@ public: // Update the new primary in the config server metadata { - auto dbt = uassertStatusOK(catalogClient->getDatabase(txn, dbname)).value; + auto dbt = uassertStatusOK(catalogClient->getDatabase(opCtx, dbname)).value; dbt.setPrimary(toShard->getId()); - uassertStatusOK(catalogClient->updateDatabase(txn, dbname, dbt)); + uassertStatusOK(catalogClient->updateDatabase(opCtx, dbname, dbt)); } // Ensure the next attempt to retrieve the database or any of its collections will do a full // reload - catalogCache->invalidate(dbname); + catalogCache->purgeDatabase(dbname); const string oldPrimary = fromShard->getConnString().toString(); @@ -242,7 +239,7 @@ public: try { BSONObj dropDBInfo; - fromconn->dropDatabase(dbname.c_str(), txn->getWriteConcern(), &dropDBInfo); + fromconn->dropDatabase(dbname.c_str(), opCtx->getWriteConcern(), &dropDBInfo); if (!hasWCError) { if (auto wcErrorElem = dropDBInfo["writeConcernError"]) { appendWriteConcernErrorToCmdResponse( @@ -275,7 +272,7 @@ public: << oldPrimary; BSONObj dropCollInfo; fromconn->dropCollection( - el.String(), txn->getWriteConcern(), &dropCollInfo); + el.String(), opCtx->getWriteConcern(), &dropCollInfo); if (!hasWCError) { if (auto wcErrorElem = dropCollInfo["writeConcernError"]) { appendWriteConcernErrorToCmdResponse( @@ -302,7 +299,7 @@ public: // Record finish in changelog catalogClient->logChange( - txn, + opCtx, "movePrimary", dbname, _buildMoveLogEntry(dbname, oldPrimary, toShard->toString(), shardedColls), diff --git a/src/mongo/s/commands/cluster_plan_cache_cmd.cpp b/src/mongo/s/commands/cluster_plan_cache_cmd.cpp index fea10e3de88..2b553b7f19f 100644 --- a/src/mongo/s/commands/cluster_plan_cache_cmd.cpp +++ b/src/mongo/s/commands/cluster_plan_cache_cmd.cpp @@ -33,11 +33,11 @@ #include "mongo/db/commands.h" #include "mongo/db/query/collation/collation_spec.h" #include "mongo/s/commands/strategy.h" -#include "mongo/s/config.h" #include "mongo/s/grid.h" #include "mongo/s/stale_exception.h" namespace mongo { +namespace { using std::string; using std::stringstream; @@ -153,8 +153,6 @@ bool ClusterPlanCacheCmd::run(OperationContext* txn, // Register plan cache commands at startup // -namespace { - MONGO_INITIALIZER(RegisterPlanCacheCommands)(InitializerContext* context) { // Leaked intentionally: a Command registers itself when constructed. @@ -174,5 +172,4 @@ MONGO_INITIALIZER(RegisterPlanCacheCommands)(InitializerContext* context) { } } // namespace - } // namespace mongo diff --git a/src/mongo/s/commands/cluster_shard_collection_cmd.cpp b/src/mongo/s/commands/cluster_shard_collection_cmd.cpp index 79f792e0c7b..f1ba15396a9 100644 --- a/src/mongo/s/commands/cluster_shard_collection_cmd.cpp +++ b/src/mongo/s/commands/cluster_shard_collection_cmd.cpp @@ -53,15 +53,12 @@ #include "mongo/s/balancer_configuration.h" #include "mongo/s/catalog/sharding_catalog_client.h" #include "mongo/s/catalog_cache.h" -#include "mongo/s/chunk_manager.h" #include "mongo/s/client/shard_registry.h" #include "mongo/s/commands/cluster_write.h" -#include "mongo/s/config.h" #include "mongo/s/config_server_client.h" #include "mongo/s/grid.h" #include "mongo/s/migration_secondary_throttle_options.h" #include "mongo/s/shard_util.h" -#include "mongo/s/sharding_raii.h" #include "mongo/util/log.h" namespace mongo { @@ -177,7 +174,7 @@ public: return parseNsFullyQualified(dbname, cmdObj); } - bool run(OperationContext* txn, + bool run(OperationContext* opCtx, const std::string& dbname, BSONObj& cmdObj, int options, @@ -185,21 +182,23 @@ public: BSONObjBuilder& result) override { const NamespaceString nss(parseNs(dbname, cmdObj)); - auto const catalogClient = Grid::get(txn)->catalogClient(txn); - auto const shardRegistry = Grid::get(txn)->shardRegistry(); + auto const catalogClient = Grid::get(opCtx)->catalogClient(opCtx); + auto const shardRegistry = Grid::get(opCtx)->shardRegistry(); + auto const catalogCache = Grid::get(opCtx)->catalogCache(); - auto scopedShardedDb = uassertStatusOK(ScopedShardDatabase::getExisting(txn, nss.db())); - const auto config = scopedShardedDb.db(); + auto dbInfo = uassertStatusOK(catalogCache->getDatabase(opCtx, nss.db())); // Ensure sharding is allowed on the database uassert(ErrorCodes::IllegalOperation, str::stream() << "sharding not enabled for db " << nss.db(), - config->isShardingEnabled()); + dbInfo.shardingEnabled()); + + auto routingInfo = uassertStatusOK(catalogCache->getCollectionRoutingInfo(opCtx, nss)); // Ensure that the collection is not sharded already uassert(ErrorCodes::IllegalOperation, str::stream() << "sharding already enabled for collection " << nss.ns(), - !config->isSharded(nss.ns())); + !routingInfo.cm()); // NOTE: We *must* take ownership of the key here - otherwise the shared BSONObj becomes // corrupt as soon as the command ends. @@ -238,7 +237,7 @@ public: bsonExtractTypedField(cmdObj, "collation", BSONType::Object, &collationElement); if (collationStatus.isOK()) { // Ensure that the collation is valid. Currently we only allow the simple collation. - auto collator = CollatorFactoryInterface::get(txn->getServiceContext()) + auto collator = CollatorFactoryInterface::get(opCtx->getServiceContext()) ->makeFromBSON(collationElement.Obj()); if (!collator.getStatus().isOK()) { return appendCommandStatus(result, collator.getStatus()); @@ -279,13 +278,7 @@ public: } // The rest of the checks require a connection to the primary db - const ConnectionString shardConnString = [&]() { - const auto shard = - uassertStatusOK(shardRegistry->getShard(txn, config->getPrimaryId())); - return shard->getConnString(); - }(); - - ScopedDbConnection conn(shardConnString); + ScopedDbConnection conn(routingInfo.primary()->getConnString()); // Retrieve the collection metadata in order to verify that it is legal to shard this // collection. @@ -503,7 +496,7 @@ public: BSONObj collationArg = !defaultCollation.isEmpty() ? CollationSpec::kSimpleSpec : BSONObj(); Status status = - clusterCreateIndex(txn, nss.ns(), proposedKey, collationArg, careAboutUnique); + clusterCreateIndex(opCtx, nss.ns(), proposedKey, collationArg, careAboutUnique); if (!status.isOK()) { errmsg = str::stream() << "ensureIndex failed to create index on " << "primary shard: " << status.reason(); @@ -582,7 +575,7 @@ public: audit::logShardCollection(Client::getCurrent(), nss.ns(), proposedKey, careAboutUnique); - uassertStatusOK(catalogClient->shardCollection(txn, + uassertStatusOK(catalogClient->shardCollection(opCtx, nss.ns(), proposedShardKey, defaultCollation, @@ -590,23 +583,27 @@ public: initSplits, std::set<ShardId>{})); - // Make sure the cached metadata for the collection knows that we are now sharded - config->getChunkManager(txn, nss.ns(), true /* reload */); - result << "collectionsharded" << nss.ns(); + // Make sure the cached metadata for the collection knows that we are now sharded + catalogCache->invalidateShardedCollection(nss); + // Only initially move chunks when using a hashed shard key if (isHashedShardKey && isEmpty) { - // Reload the new config info. If we created more than one initial chunk, then - // we need to move them around to balance. - auto chunkManager = config->getChunkManager(txn, nss.ns(), true); - ChunkMap chunkMap = chunkManager->getChunkMap(); + routingInfo = uassertStatusOK(catalogCache->getCollectionRoutingInfo(opCtx, nss)); + uassert(ErrorCodes::ConflictingOperationInProgress, + "Collection was successfully written as sharded but got dropped before it " + "could be evenly distributed", + routingInfo.cm()); + auto chunkManager = routingInfo.cm(); + + const auto chunkMap = chunkManager->chunkMap(); // 2. Move and commit each "big chunk" to a different shard. int i = 0; for (ChunkMap::const_iterator c = chunkMap.begin(); c != chunkMap.end(); ++c, ++i) { const ShardId& shardId = shardIds[i % numShards]; - const auto toStatus = shardRegistry->getShard(txn, shardId); + const auto toStatus = shardRegistry->getShard(opCtx, shardId); if (!toStatus.isOK()) { continue; } @@ -627,10 +624,10 @@ public: chunkType.setVersion(chunkManager->getVersion()); Status moveStatus = configsvr_client::moveChunk( - txn, + opCtx, chunkType, to->getId(), - Grid::get(txn)->getBalancerConfiguration()->getMaxChunkSizeBytes(), + Grid::get(opCtx)->getBalancerConfiguration()->getMaxChunkSizeBytes(), MigrationSecondaryThrottleOptions::create( MigrationSecondaryThrottleOptions::kOff), true); @@ -646,7 +643,13 @@ public: } // Reload the config info, after all the migrations - chunkManager = config->getChunkManager(txn, nss.ns(), true); + catalogCache->invalidateShardedCollection(nss); + routingInfo = uassertStatusOK(catalogCache->getCollectionRoutingInfo(opCtx, nss)); + uassert(ErrorCodes::ConflictingOperationInProgress, + "Collection was successfully written as sharded but got dropped before it " + "could be evenly distributed", + routingInfo.cm()); + chunkManager = routingInfo.cm(); // 3. Subdivide the big chunks by splitting at each of the points in "allSplits" // that we haven't already split by. @@ -658,7 +661,7 @@ public: if (i == allSplits.size() || !currentChunk->containsKey(allSplits[i])) { if (!subSplits.empty()) { auto splitStatus = shardutil::splitChunkAtMultiplePoints( - txn, + opCtx, currentChunk->getShardId(), nss, chunkManager->getShardKeyPattern(), @@ -689,10 +692,6 @@ public: subSplits.push_back(splitPoint); } } - - // Proactively refresh the chunk manager. Not really necessary, but this way it's - // immediately up-to-date the next time it's used. - config->getChunkManager(txn, nss.ns(), true); } return true; diff --git a/src/mongo/s/commands/cluster_split_cmd.cpp b/src/mongo/s/commands/cluster_split_cmd.cpp index 57e5a54881b..56224d5b911 100644 --- a/src/mongo/s/commands/cluster_split_cmd.cpp +++ b/src/mongo/s/commands/cluster_split_cmd.cpp @@ -37,15 +37,13 @@ #include "mongo/db/auth/action_type.h" #include "mongo/db/auth/authorization_manager.h" #include "mongo/db/auth/authorization_session.h" -#include "mongo/db/client.h" #include "mongo/db/commands.h" #include "mongo/db/field_parser.h" #include "mongo/s/catalog_cache.h" -#include "mongo/s/chunk_manager.h" #include "mongo/s/client/shard_registry.h" +#include "mongo/s/commands/cluster_commands_common.h" #include "mongo/s/grid.h" #include "mongo/s/shard_util.h" -#include "mongo/s/sharding_raii.h" #include "mongo/util/log.h" namespace mongo { @@ -90,20 +88,19 @@ class SplitCollectionCmd : public Command { public: SplitCollectionCmd() : Command("split", false, "split") {} - virtual bool slaveOk() const { + bool slaveOk() const override { return true; } - virtual bool adminOnly() const { + bool adminOnly() const override { return true; } - - virtual bool supportsWriteConcern(const BSONObj& cmd) const override { + bool supportsWriteConcern(const BSONObj& cmd) const override { return false; } - virtual void help(std::stringstream& help) const { + void help(std::stringstream& help) const override { help << " example: - split the shard that contains give key\n" << " { split : 'alleyinsider.blog.posts' , find : { ts : 1 } }\n" << " example: - split the shard that contains the key with this as the middle\n" @@ -111,9 +108,9 @@ public: << " NOTE: this does not move the chunks, it just creates a logical separation."; } - virtual Status checkAuthForCommand(Client* client, - const std::string& dbname, - const BSONObj& cmdObj) { + Status checkAuthForCommand(Client* client, + const std::string& dbname, + const BSONObj& cmdObj) override { if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource( ResourcePattern::forExactNamespace(NamespaceString(parseNs(dbname, cmdObj))), ActionType::splitChunk)) { @@ -122,19 +119,22 @@ public: return Status::OK(); } - virtual std::string parseNs(const std::string& dbname, const BSONObj& cmdObj) const { + std::string parseNs(const std::string& dbname, const BSONObj& cmdObj) const override { return parseNsFullyQualified(dbname, cmdObj); } - virtual bool run(OperationContext* txn, - const std::string& dbname, - BSONObj& cmdObj, - int options, - std::string& errmsg, - BSONObjBuilder& result) { + bool run(OperationContext* opCtx, + const std::string& dbname, + BSONObj& cmdObj, + int options, + std::string& errmsg, + BSONObjBuilder& result) override { const NamespaceString nss(parseNs(dbname, cmdObj)); - auto scopedCM = uassertStatusOK(ScopedChunkManager::refreshAndGet(txn, nss)); + auto routingInfo = uassertStatusOK( + Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(opCtx, + nss)); + const auto cm = routingInfo.cm(); const BSONField<BSONObj> findField("find", BSONObj()); const BSONField<BSONArray> boundsField("bounds", BSONArray()); @@ -190,14 +190,12 @@ public: return false; } - auto const cm = scopedCM.cm(); - std::shared_ptr<Chunk> chunk; if (!find.isEmpty()) { // find BSONObj shardKey = - uassertStatusOK(cm->getShardKeyPattern().extractShardKeyFromQuery(txn, find)); + uassertStatusOK(cm->getShardKeyPattern().extractShardKeyFromQuery(opCtx, find)); if (shardKey.isEmpty()) { errmsg = stream() << "no shard key found in chunk query " << find; return false; @@ -255,7 +253,7 @@ public: // middle of the chunk. const BSONObj splitPoint = !middle.isEmpty() ? middle - : selectMedianKey(txn, + : selectMedianKey(opCtx, chunk->getShardId(), nss, cm->getShardKeyPattern(), @@ -267,7 +265,7 @@ public: << redact(splitPoint); uassertStatusOK( - shardutil::splitChunkAtMultiplePoints(txn, + shardutil::splitChunkAtMultiplePoints(opCtx, chunk->getShardId(), nss, cm->getShardKeyPattern(), @@ -275,9 +273,7 @@ public: ChunkRange(chunk->getMin(), chunk->getMax()), {splitPoint})); - // Proactively refresh the chunk manager. Not strictly necessary, but this way it's - // immediately up-to-date the next time it's used. - scopedCM.db()->getChunkManagerIfExists(txn, nss.ns(), true); + Grid::get(opCtx)->catalogCache()->onStaleConfigError(std::move(routingInfo)); return true; } diff --git a/src/mongo/s/commands/cluster_write.cpp b/src/mongo/s/commands/cluster_write.cpp index 3468ca6dcf4..8b8a3f2e644 100644 --- a/src/mongo/s/commands/cluster_write.cpp +++ b/src/mongo/s/commands/cluster_write.cpp @@ -39,14 +39,13 @@ #include "mongo/s/balancer_configuration.h" #include "mongo/s/catalog/sharding_catalog_client.h" #include "mongo/s/catalog/type_collection.h" -#include "mongo/s/chunk.h" +#include "mongo/s/catalog_cache.h" #include "mongo/s/client/shard_registry.h" #include "mongo/s/commands/chunk_manager_targeter.h" #include "mongo/s/commands/dbclient_multi_command.h" #include "mongo/s/config_server_client.h" #include "mongo/s/grid.h" #include "mongo/s/shard_util.h" -#include "mongo/s/sharding_raii.h" #include "mongo/util/log.h" #include "mongo/util/mongoutils/str.h" @@ -66,11 +65,6 @@ void toBatchError(const Status& status, BatchedCommandResponse* response) { dassert(response->isValid(NULL)); } -void reloadChunkManager(OperationContext* txn, const NamespaceString& nss) { - auto config = uassertStatusOK(ScopedShardDatabase::getExisting(txn, nss.db())); - config.db()->getChunkManagerIfExists(txn, nss.ns(), true); -} - /** * Given a maxChunkSize configuration and the number of chunks in a particular sharded collection, * returns an optimal chunk size to use in order to achieve a good ratio between number of chunks @@ -104,7 +98,7 @@ uint64_t calculateDesiredChunkSize(uint64_t maxChunkSizeBytes, uint64_t numChunk * ordered list of ascending/descending field names. For example {a : 1, b : -1} is not special, but * {a : "hashed"} is. */ -BSONObj findExtremeKeyForShard(OperationContext* txn, +BSONObj findExtremeKeyForShard(OperationContext* opCtx, const NamespaceString& nss, const ShardId& shardId, const ShardKeyPattern& shardKeyPattern, @@ -130,7 +124,8 @@ BSONObj findExtremeKeyForShard(OperationContext* txn, // Find the extreme key const auto shardConnStr = [&]() { - const auto shard = uassertStatusOK(Grid::get(txn)->shardRegistry()->getShard(txn, shardId)); + const auto shard = + uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardId)); return shard->getConnString(); }(); @@ -172,31 +167,34 @@ BSONObj findExtremeKeyForShard(OperationContext* txn, /** * Splits the chunks touched based from the targeter stats if needed. */ -void splitIfNeeded(OperationContext* txn, const NamespaceString& nss, const TargeterStats& stats) { - auto scopedCMStatus = ScopedChunkManager::get(txn, nss); - if (!scopedCMStatus.isOK()) { - warning() << "failed to get collection information for " << nss - << " while checking for auto-split" << causedBy(scopedCMStatus.getStatus()); +void splitIfNeeded(OperationContext* opCtx, + const NamespaceString& nss, + const TargeterStats& stats) { + auto routingInfoStatus = Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfo(opCtx, nss); + if (!routingInfoStatus.isOK()) { + log() << "failed to get collection information for " << nss + << " while checking for auto-split" << causedBy(routingInfoStatus.getStatus()); return; } - const auto& scopedCM = scopedCMStatus.getValue(); + auto& routingInfo = routingInfoStatus.getValue(); - if (!scopedCM.cm()) { + if (!routingInfo.cm()) { return; } for (auto it = stats.chunkSizeDelta.cbegin(); it != stats.chunkSizeDelta.cend(); ++it) { std::shared_ptr<Chunk> chunk; try { - chunk = scopedCM.cm()->findIntersectingChunkWithSimpleCollation(it->first); + chunk = routingInfo.cm()->findIntersectingChunkWithSimpleCollation(it->first); } catch (const AssertionException& ex) { warning() << "could not find chunk while checking for auto-split: " << causedBy(redact(ex)); return; } - updateChunkWriteStatsAndSplitIfNeeded(txn, scopedCM.cm().get(), chunk.get(), it->second); + updateChunkWriteStatsAndSplitIfNeeded( + opCtx, routingInfo.cm().get(), chunk.get(), it->second); } } @@ -205,7 +203,7 @@ void splitIfNeeded(OperationContext* txn, const NamespaceString& nss, const Targ ClusterWriter::ClusterWriter(bool autoSplit, int timeoutMillis) : _autoSplit(autoSplit), _timeoutMillis(timeoutMillis) {} -void ClusterWriter::write(OperationContext* txn, +void ClusterWriter::write(OperationContext* opCtx, const BatchedCommandRequest& origRequest, BatchedCommandResponse* response) { // Add _ids to insert request if req'd @@ -291,14 +289,14 @@ void ClusterWriter::write(OperationContext* txn, request = requestWithWriteConcern.get(); } - Grid::get(txn)->catalogClient(txn)->writeConfigServerDirect(txn, *request, response); + Grid::get(opCtx)->catalogClient(opCtx)->writeConfigServerDirect(opCtx, *request, response); } else { TargeterStats targeterStats; { ChunkManagerTargeter targeter(request->getTargetingNSS(), &targeterStats); - Status targetInitStatus = targeter.init(txn); + Status targetInitStatus = targeter.init(opCtx); if (!targetInitStatus.isOK()) { toBatchError(Status(targetInitStatus.code(), str::stream() @@ -313,11 +311,11 @@ void ClusterWriter::write(OperationContext* txn, DBClientMultiCommand dispatcher; BatchWriteExec exec(&targeter, &dispatcher); - exec.executeBatch(txn, *request, response, &_stats); + exec.executeBatch(opCtx, *request, response, &_stats); } if (_autoSplit) { - splitIfNeeded(txn, request->getNS(), targeterStats); + splitIfNeeded(opCtx, request->getNS(), targeterStats); } } } @@ -326,7 +324,7 @@ const BatchWriteExecStats& ClusterWriter::getStats() { return _stats; } -void updateChunkWriteStatsAndSplitIfNeeded(OperationContext* txn, +void updateChunkWriteStatsAndSplitIfNeeded(OperationContext* opCtx, ChunkManager* manager, Chunk* chunk, long dataWritten) { @@ -334,7 +332,7 @@ void updateChunkWriteStatsAndSplitIfNeeded(OperationContext* txn, // bubbled up on the client connection doing a write. LastError::Disabled d(&LastError::get(cc())); - const auto balancerConfig = Grid::get(txn)->getBalancerConfiguration(); + const auto balancerConfig = Grid::get(opCtx)->getBalancerConfiguration(); const bool minIsInf = (0 == manager->getShardKeyPattern().getKeyPattern().globalMin().woCompare(chunk->getMin())); @@ -370,7 +368,7 @@ void updateChunkWriteStatsAndSplitIfNeeded(OperationContext* txn, try { // Ensure we have the most up-to-date balancer configuration - uassertStatusOK(balancerConfig->refreshAndCheck(txn)); + uassertStatusOK(balancerConfig->refreshAndCheck(opCtx)); if (!balancerConfig->getShouldAutoSplit()) { return; @@ -393,7 +391,7 @@ void updateChunkWriteStatsAndSplitIfNeeded(OperationContext* txn, }(); auto splitPoints = - uassertStatusOK(shardutil::selectChunkSplitPoints(txn, + uassertStatusOK(shardutil::selectChunkSplitPoints(opCtx, chunk->getShardId(), nss, manager->getShardKeyPattern(), @@ -425,13 +423,13 @@ void updateChunkWriteStatsAndSplitIfNeeded(OperationContext* txn, if (KeyPattern::isOrderedKeyPattern(manager->getShardKeyPattern().toBSON())) { if (minIsInf) { BSONObj key = findExtremeKeyForShard( - txn, nss, chunk->getShardId(), manager->getShardKeyPattern(), true); + opCtx, nss, chunk->getShardId(), manager->getShardKeyPattern(), true); if (!key.isEmpty()) { splitPoints.front() = key.getOwned(); } } else if (maxIsInf) { BSONObj key = findExtremeKeyForShard( - txn, nss, chunk->getShardId(), manager->getShardKeyPattern(), false); + opCtx, nss, chunk->getShardId(), manager->getShardKeyPattern(), false); if (!key.isEmpty()) { splitPoints.back() = key.getOwned(); } @@ -439,7 +437,7 @@ void updateChunkWriteStatsAndSplitIfNeeded(OperationContext* txn, } const auto suggestedMigrateChunk = - uassertStatusOK(shardutil::splitChunkAtMultiplePoints(txn, + uassertStatusOK(shardutil::splitChunkAtMultiplePoints(opCtx, chunk->getShardId(), nss, manager->getShardKeyPattern(), @@ -454,7 +452,7 @@ void updateChunkWriteStatsAndSplitIfNeeded(OperationContext* txn, return false; auto collStatus = - Grid::get(txn)->catalogClient(txn)->getCollection(txn, manager->getns()); + Grid::get(opCtx)->catalogClient(opCtx)->getCollection(opCtx, manager->getns()); if (!collStatus.isOK()) { log() << "Auto-split for " << nss << " failed to load collection metadata" << causedBy(redact(collStatus.getStatus())); @@ -469,21 +467,22 @@ void updateChunkWriteStatsAndSplitIfNeeded(OperationContext* txn, << (suggestedMigrateChunk ? "" : (std::string) " (migrate suggested" + (shouldBalance ? ")" : ", but no migrations allowed)")); + // Reload the chunk manager after the split + auto routingInfo = uassertStatusOK( + Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(opCtx, + nss)); + if (!shouldBalance || !suggestedMigrateChunk) { - reloadChunkManager(txn, nss); return; } // Top chunk optimization - try to move the top chunk out of this shard to prevent the hot - // spot - // from staying on a single shard. This is based on the assumption that succeeding inserts - // will - // fall on the top chunk. + // spot from staying on a single shard. This is based on the assumption that succeeding + // inserts will fall on the top chunk. // We need to use the latest chunk manager (after the split) in order to have the most // up-to-date view of the chunk we are about to move - auto scopedCM = uassertStatusOK(ScopedChunkManager::refreshAndGet(txn, nss)); - auto suggestedChunk = scopedCM.cm()->findIntersectingChunkWithSimpleCollation( + auto suggestedChunk = routingInfo.cm()->findIntersectingChunkWithSimpleCollation( suggestedMigrateChunk->getMin()); ChunkType chunkToMove; @@ -493,9 +492,10 @@ void updateChunkWriteStatsAndSplitIfNeeded(OperationContext* txn, chunkToMove.setMax(suggestedChunk->getMax()); chunkToMove.setVersion(suggestedChunk->getLastmod()); - uassertStatusOK(configsvr_client::rebalanceChunk(txn, chunkToMove)); + uassertStatusOK(configsvr_client::rebalanceChunk(opCtx, chunkToMove)); - reloadChunkManager(txn, nss); + // Ensure the collection gets reloaded because of the move + Grid::get(opCtx)->catalogCache()->invalidateShardedCollection(nss); } catch (const DBException& ex) { chunk->randomizeBytesWritten(); diff --git a/src/mongo/s/commands/commands_public.cpp b/src/mongo/s/commands/commands_public.cpp index 7f9209774d2..976c5ce758d 100644 --- a/src/mongo/s/commands/commands_public.cpp +++ b/src/mongo/s/commands/commands_public.cpp @@ -54,7 +54,6 @@ #include "mongo/rpc/get_status_from_command_result.h" #include "mongo/s/catalog/sharding_catalog_client.h" #include "mongo/s/catalog_cache.h" -#include "mongo/s/chunk_manager.h" #include "mongo/s/client/shard_connection.h" #include "mongo/s/client/shard_registry.h" #include "mongo/s/cluster_last_error_info.h" @@ -62,10 +61,8 @@ #include "mongo/s/commands/cluster_explain.h" #include "mongo/s/commands/run_on_all_shards_cmd.h" #include "mongo/s/commands/sharded_command_processing.h" -#include "mongo/s/config.h" #include "mongo/s/grid.h" #include "mongo/s/query/store_possible_cursor.h" -#include "mongo/s/sharding_raii.h" #include "mongo/s/stale_exception.h" #include "mongo/scripting/engine.h" #include "mongo/util/log.h" @@ -86,20 +83,20 @@ using std::vector; namespace { -bool cursorCommandPassthrough(OperationContext* txn, - shared_ptr<DBConfig> conf, +bool cursorCommandPassthrough(OperationContext* opCtx, + StringData dbName, + const ShardId& shardId, const BSONObj& cmdObj, const NamespaceString& nss, int options, BSONObjBuilder* out) { - const auto shardStatus = Grid::get(txn)->shardRegistry()->getShard(txn, conf->getPrimaryId()); + const auto shardStatus = Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardId); if (!shardStatus.isOK()) { - invariant(shardStatus.getStatus() == ErrorCodes::ShardNotFound); return Command::appendCommandStatus(*out, shardStatus.getStatus()); } const auto shard = shardStatus.getValue(); ScopedDbConnection conn(shard->getConnString()); - auto cursor = conn->query(str::stream() << conf->name() << ".$cmd", + auto cursor = conn->query(str::stream() << dbName << ".$cmd", cmdObj, -1, // nToReturn 0, // nToSkip @@ -123,8 +120,8 @@ bool cursorCommandPassthrough(OperationContext* txn, storePossibleCursor(HostAndPort(cursor->originalHost()), response, nss, - Grid::get(txn)->getExecutorPool()->getArbitraryExecutor(), - Grid::get(txn)->getCursorManager()); + Grid::get(opCtx)->getExecutorPool()->getArbitraryExecutor(), + Grid::get(opCtx)->getCursorManager()); if (!transformedResponse.isOK()) { return Command::appendCommandStatus(*out, transformedResponse.getStatus()); } @@ -154,11 +151,13 @@ StatusWith<BSONObj> getCollation(const BSONObj& cmdObj) { } class PublicGridCommand : public Command { -public: +protected: PublicGridCommand(const char* n, const char* oldname = NULL) : Command(n, false, oldname) {} + virtual bool slaveOk() const { return true; } + virtual bool adminOnly() const { return false; } @@ -169,41 +168,29 @@ public: return false; } - // all grid commands are designed not to lock - -protected: - bool passthrough(OperationContext* txn, - DBConfig* conf, - const BSONObj& cmdObj, - BSONObjBuilder& result) { - return _passthrough(txn, conf->name(), conf, cmdObj, 0, result); - } - - bool adminPassthrough(OperationContext* txn, - DBConfig* conf, + bool adminPassthrough(OperationContext* opCtx, + const ShardId& shardId, const BSONObj& cmdObj, BSONObjBuilder& result) { - return _passthrough(txn, "admin", conf, cmdObj, 0, result); + return passthrough(opCtx, "admin", shardId, cmdObj, result); } - bool passthrough(OperationContext* txn, - DBConfig* conf, + bool passthrough(OperationContext* opCtx, + const std::string& db, + const ShardId& shardId, const BSONObj& cmdObj, - int options, BSONObjBuilder& result) { - return _passthrough(txn, conf->name(), conf, cmdObj, options, result); + return passthrough(opCtx, db, shardId, cmdObj, 0, result); } -private: - bool _passthrough(OperationContext* txn, - const string& db, - DBConfig* conf, - const BSONObj& cmdObj, - int options, - BSONObjBuilder& result) { - const auto shardStatus = - Grid::get(txn)->shardRegistry()->getShard(txn, conf->getPrimaryId()); - const auto shard = uassertStatusOK(shardStatus); + bool passthrough(OperationContext* opCtx, + const std::string& db, + const ShardId& shardId, + const BSONObj& cmdObj, + int options, + BSONObjBuilder& result) { + const auto shard = + uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardId)); ShardConnection conn(shard->getConnString(), ""); @@ -222,53 +209,50 @@ private: }; class AllShardsCollectionCommand : public RunOnAllShardsCommand { -public: +protected: AllShardsCollectionCommand(const char* n, const char* oldname = NULL, bool useShardConn = false, bool implicitCreateDb = false) : RunOnAllShardsCommand(n, oldname, useShardConn, implicitCreateDb) {} - virtual void getShardIds(OperationContext* txn, - const string& dbName, - BSONObj& cmdObj, - vector<ShardId>& shardIds) { - const string fullns = dbName + '.' + cmdObj.firstElement().valuestrsafe(); - - auto status = Grid::get(txn)->catalogCache()->getDatabase(txn, dbName); - uassertStatusOK(status.getStatus()); - - shared_ptr<DBConfig> conf = status.getValue(); - - if (!conf->isSharded(fullns)) { - shardIds.push_back(conf->getPrimaryId()); + void getShardIds(OperationContext* opCtx, + const string& dbName, + BSONObj& cmdObj, + vector<ShardId>& shardIds) override { + const NamespaceString nss(parseNsCollectionRequired(dbName, cmdObj)); + + const auto routingInfo = + uassertStatusOK(Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfo(opCtx, nss)); + if (routingInfo.cm()) { + // If it's a sharded collection, send it to all shards + Grid::get(opCtx)->shardRegistry()->getAllShardIds(&shardIds); } else { - Grid::get(txn)->shardRegistry()->getAllShardIds(&shardIds); + // Otherwise just send it to the primary shard for the database + shardIds.push_back(routingInfo.primaryId()); } } }; class NotAllowedOnShardedCollectionCmd : public PublicGridCommand { -public: +protected: NotAllowedOnShardedCollectionCmd(const char* n) : PublicGridCommand(n) {} - virtual bool run(OperationContext* txn, - const string& dbName, - BSONObj& cmdObj, - int options, - string& errmsg, - BSONObjBuilder& result) { - const string fullns = parseNs(dbName, cmdObj); + bool run(OperationContext* opCtx, + const string& dbName, + BSONObj& cmdObj, + int options, + string& errmsg, + BSONObjBuilder& result) override { + const NamespaceString nss(parseNs(dbName, cmdObj)); - auto conf = uassertStatusOK(Grid::get(txn)->catalogCache()->getDatabase(txn, dbName)); - if (!conf->isSharded(fullns)) { - return passthrough(txn, conf.get(), cmdObj, options, result); - } + const auto routingInfo = + uassertStatusOK(Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfo(opCtx, nss)); + uassert(ErrorCodes::IllegalOperation, + str::stream() << "can't do command: " << getName() << " on sharded collection", + !routingInfo.cm()); - return appendCommandStatus( - result, - Status(ErrorCodes::IllegalOperation, - str::stream() << "can't do command: " << getName() << " on sharded collection")); + return passthrough(opCtx, dbName, routingInfo.primaryId(), cmdObj, options, result); } }; @@ -406,6 +390,7 @@ public: class ReIndexCmd : public AllShardsCollectionCommand { public: ReIndexCmd() : AllShardsCollectionCommand("reIndex") {} + virtual void addRequiredPrivileges(const std::string& dbname, const BSONObj& cmdObj, std::vector<Privilege>* out) { @@ -417,6 +402,7 @@ public: virtual bool supportsWriteConcern(const BSONObj& cmd) const override { return true; } + } reIndexCmd; class CollectionModCmd : public AllShardsCollectionCommand { @@ -433,12 +419,13 @@ public: virtual bool supportsWriteConcern(const BSONObj& cmd) const override { return true; } -} collectionModCmd; +} collectionModCmd; class ValidateCmd : public PublicGridCommand { public: ValidateCmd() : PublicGridCommand("validate") {} + virtual void addRequiredPrivileges(const std::string& dbname, const BSONObj& cmdObj, std::vector<Privilege>* out) { @@ -451,26 +438,32 @@ public: return false; } - bool run(OperationContext* txn, + bool run(OperationContext* opCtx, const string& dbName, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& output) { - const NamespaceString nss = parseNsCollectionRequired(dbName, cmdObj); + const NamespaceString nss(parseNsCollectionRequired(dbName, cmdObj)); - auto conf = uassertStatusOK(Grid::get(txn)->catalogCache()->getDatabase(txn, dbName)); - if (!conf->isSharded(nss.ns())) { - return passthrough(txn, conf.get(), cmdObj, output); + auto routingInfo = + uassertStatusOK(Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfo(opCtx, nss)); + if (!routingInfo.cm()) { + return passthrough(opCtx, dbName, routingInfo.primaryId(), cmdObj, output); } - shared_ptr<ChunkManager> cm = conf->getChunkManager(txn, nss.ns()); - massert(40051, "chunk manager should not be null", cm); + const auto cm = routingInfo.cm(); vector<Strategy::CommandResult> results; const BSONObj query; - Strategy::commandOp( - txn, dbName, cmdObj, options, cm->getns(), query, CollationSpec::kSimpleSpec, &results); + Strategy::commandOp(opCtx, + dbName, + cmdObj, + options, + cm->getns(), + query, + CollationSpec::kSimpleSpec, + &results); BSONObjBuilder rawResBuilder(output.subobjStart("raw")); bool isValid = true; @@ -505,33 +498,35 @@ public: } return true; } + } validateCmd; class CreateCmd : public PublicGridCommand { public: CreateCmd() : PublicGridCommand("create") {} - virtual Status checkAuthForCommand(Client* client, - const std::string& dbname, - const BSONObj& cmdObj) { - NamespaceString nss(parseNs(dbname, cmdObj)); + + Status checkAuthForCommand(Client* client, + const std::string& dbname, + const BSONObj& cmdObj) override { + const NamespaceString nss(parseNsCollectionRequired(dbname, cmdObj)); return AuthorizationSession::get(client)->checkAuthForCreate(nss, cmdObj); } - virtual bool supportsWriteConcern(const BSONObj& cmd) const override { + + bool supportsWriteConcern(const BSONObj& cmd) const override { return true; } - bool run(OperationContext* txn, + + bool run(OperationContext* opCtx, const string& dbName, BSONObj& cmdObj, int, string& errmsg, - BSONObjBuilder& result) { - auto dbStatus = ScopedShardDatabase::getOrCreate(txn, dbName); - if (!dbStatus.isOK()) { - return appendCommandStatus(result, dbStatus.getStatus()); - } + BSONObjBuilder& result) override { + uassertStatusOK(createShardDatabase(opCtx, dbName)); - auto scopedDb = std::move(dbStatus.getValue()); - return passthrough(txn, scopedDb.db(), cmdObj, result); + const auto dbInfo = + uassertStatusOK(Grid::get(opCtx)->catalogCache()->getDatabase(opCtx, dbName)); + return passthrough(opCtx, dbName, dbInfo.primaryId(), cmdObj, result); } } createCmd; @@ -539,44 +534,60 @@ public: class RenameCollectionCmd : public PublicGridCommand { public: RenameCollectionCmd() : PublicGridCommand("renameCollection") {} + virtual Status checkAuthForCommand(Client* client, const std::string& dbname, const BSONObj& cmdObj) { return rename_collection::checkAuthForRenameCollectionCommand(client, dbname, cmdObj); } + virtual bool adminOnly() const { return true; } + virtual bool supportsWriteConcern(const BSONObj& cmd) const override { return true; } - bool run(OperationContext* txn, + + bool run(OperationContext* opCtx, const string& dbName, BSONObj& cmdObj, - int, + int options, string& errmsg, - BSONObjBuilder& result) { - const string fullnsFrom = cmdObj.firstElement().valuestrsafe(); - const string dbNameFrom = nsToDatabase(fullnsFrom); - auto confFrom = - uassertStatusOK(Grid::get(txn)->catalogCache()->getDatabase(txn, dbNameFrom)); + BSONObjBuilder& result) override { + const auto fullNsFromElt = cmdObj.firstElement(); + uassert(ErrorCodes::InvalidNamespace, + "'renameCollection' must be of type String", + fullNsFromElt.type() == BSONType::String); + const NamespaceString fullnsFrom(fullNsFromElt.valueStringData()); + uassert(ErrorCodes::InvalidNamespace, + str::stream() << "Invalid source namespace: " << fullnsFrom.ns(), + fullnsFrom.isValid()); - const string fullnsTo = cmdObj["to"].valuestrsafe(); - const string dbNameTo = nsToDatabase(fullnsTo); - auto confTo = uassertStatusOK(Grid::get(txn)->catalogCache()->getDatabase(txn, dbNameTo)); + const auto fullnsToElt = cmdObj["to"]; + uassert(ErrorCodes::InvalidNamespace, + "'to' must be of type String", + fullnsToElt.type() == BSONType::String); + const NamespaceString fullnsTo(fullnsToElt.valueStringData()); + uassert(ErrorCodes::InvalidNamespace, + str::stream() << "Invalid target namespace: " << fullnsTo.ns(), + fullnsTo.isValid()); - uassert(13138, "You can't rename a sharded collection", !confFrom->isSharded(fullnsFrom)); - uassert(13139, "You can't rename to a sharded collection", !confTo->isSharded(fullnsTo)); + const auto fromRoutingInfo = uassertStatusOK( + Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfo(opCtx, fullnsFrom)); + uassert(13138, "You can't rename a sharded collection", !fromRoutingInfo.cm()); - auto shardTo = confTo->getPrimaryId(); - auto shardFrom = confFrom->getPrimaryId(); + const auto toRoutingInfo = uassertStatusOK( + Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfo(opCtx, fullnsTo)); + uassert(13139, "You can't rename to a sharded collection", !toRoutingInfo.cm()); uassert(13137, "Source and destination collections must be on same shard", - shardFrom == shardTo); + fromRoutingInfo.primaryId() == toRoutingInfo.primaryId()); - return adminPassthrough(txn, confFrom.get(), cmdObj, result); + return adminPassthrough(opCtx, fromRoutingInfo.primaryId(), cmdObj, result); } + } renameCollectionCmd; class CopyDBCmd : public PublicGridCommand { @@ -597,7 +608,7 @@ public: return true; } - bool run(OperationContext* txn, + bool run(OperationContext* opCtx, const string& dbName, BSONObj& cmdObj, int options, @@ -612,14 +623,14 @@ public: "invalid todb argument", NamespaceString::validDBName(todb, NamespaceString::DollarInDbNameBehavior::Allow)); - auto scopedToDb = uassertStatusOK(ScopedShardDatabase::getOrCreate(txn, todb)); + auto toDbInfo = uassertStatusOK(createShardDatabase(opCtx, todb)); uassert(ErrorCodes::IllegalOperation, "Cannot copy to a sharded database", - !scopedToDb.db()->isShardingEnabled()); + !toDbInfo.shardingEnabled()); - const string fromhost = cmdObj.getStringField("fromhost"); + const std::string fromhost = cmdObj.getStringField("fromhost"); if (!fromhost.empty()) { - return adminPassthrough(txn, scopedToDb.db(), cmdObj, result); + return adminPassthrough(opCtx, toDbInfo.primaryId(), cmdObj, result); } const auto fromDbElt = cmdObj["fromdb"]; @@ -632,10 +643,10 @@ public: "invalid fromdb argument", NamespaceString::validDBName(fromdb, NamespaceString::DollarInDbNameBehavior::Allow)); - auto scopedFromDb = uassertStatusOK(ScopedShardDatabase::getExisting(txn, fromdb)); + auto fromDbInfo = uassertStatusOK(createShardDatabase(opCtx, fromdb)); uassert(ErrorCodes::IllegalOperation, "Cannot copy from a sharded database", - !scopedFromDb.db()->isShardingEnabled()); + !fromDbInfo.shardingEnabled()); BSONObjBuilder b; BSONForEach(e, cmdObj) { @@ -646,11 +657,11 @@ public: { const auto shard = uassertStatusOK( - Grid::get(txn)->shardRegistry()->getShard(txn, scopedFromDb.db()->getPrimaryId())); + Grid::get(opCtx)->shardRegistry()->getShard(opCtx, fromDbInfo.primaryId())); b.append("fromhost", shard->getConnString().toString()); } - return adminPassthrough(txn, scopedToDb.db(), b.obj(), result); + return adminPassthrough(opCtx, toDbInfo.primaryId(), b.obj(), result); } } clusterCopyDBCmd; @@ -658,38 +669,38 @@ public: class CollectionStats : public PublicGridCommand { public: CollectionStats() : PublicGridCommand("collStats", "collstats") {} - virtual void addRequiredPrivileges(const std::string& dbname, - const BSONObj& cmdObj, - std::vector<Privilege>* out) { + + void addRequiredPrivileges(const std::string& dbname, + const BSONObj& cmdObj, + std::vector<Privilege>* out) override { ActionSet actions; actions.addAction(ActionType::collStats); out->push_back(Privilege(parseResourcePattern(dbname, cmdObj), actions)); } - virtual bool supportsWriteConcern(const BSONObj& cmd) const override { + bool supportsWriteConcern(const BSONObj& cmd) const override { return false; } - bool run(OperationContext* txn, + bool run(OperationContext* opCtx, const string& dbName, BSONObj& cmdObj, - int, + int options, string& errmsg, - BSONObjBuilder& result) { - const string fullns = parseNs(dbName, cmdObj); + BSONObjBuilder& result) override { + const NamespaceString nss(parseNsCollectionRequired(dbName, cmdObj)); - auto conf = uassertStatusOK(Grid::get(txn)->catalogCache()->getDatabase(txn, dbName)); - if (!conf->isSharded(fullns)) { + auto routingInfo = + uassertStatusOK(Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfo(opCtx, nss)); + if (!routingInfo.cm()) { result.appendBool("sharded", false); - result.append("primary", conf->getPrimaryId().toString()); - - return passthrough(txn, conf.get(), cmdObj, result); + result.append("primary", routingInfo.primaryId().toString()); + return passthrough(opCtx, dbName, routingInfo.primaryId(), cmdObj, result); } - result.appendBool("sharded", true); + const auto cm = routingInfo.cm(); - shared_ptr<ChunkManager> cm = conf->getChunkManager(txn, fullns); - massert(12594, "how could chunk manager be null!", cm); + result.appendBool("sharded", true); BSONObjBuilder shardStats; map<string, long long> counts; @@ -703,7 +714,7 @@ public: set<ShardId> shardIds; cm->getAllShardIds(&shardIds); for (const ShardId& shardId : shardIds) { - const auto shardStatus = Grid::get(txn)->shardRegistry()->getShard(txn, shardId); + const auto shardStatus = Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardId); if (!shardStatus.isOK()) { invariant(shardStatus.getStatus() == ErrorCodes::ShardNotFound); continue; @@ -802,7 +813,7 @@ public: unscaledCollSize += shardAvgObjSize * shardObjCount; } - result.append("ns", fullns); + result.append("ns", nss.ns()); for (map<string, long long>::iterator i = counts.begin(); i != counts.end(); ++i) result.appendNumber(i->first, i->second); @@ -830,40 +841,44 @@ public: return true; } + } collectionStatsCmd; class DataSizeCmd : public PublicGridCommand { public: DataSizeCmd() : PublicGridCommand("dataSize", "datasize") {} - virtual string parseNs(const string& dbname, const BSONObj& cmdObj) const override { + + std::string parseNs(const std::string& dbname, const BSONObj& cmdObj) const override { return parseNsFullyQualified(dbname, cmdObj); } - virtual void addRequiredPrivileges(const std::string& dbname, - const BSONObj& cmdObj, - std::vector<Privilege>* out) { + + void addRequiredPrivileges(const std::string& dbname, + const BSONObj& cmdObj, + std::vector<Privilege>* out) override { ActionSet actions; actions.addAction(ActionType::find); out->push_back(Privilege(parseResourcePattern(dbname, cmdObj), actions)); } - virtual bool supportsWriteConcern(const BSONObj& cmd) const override { + + bool supportsWriteConcern(const BSONObj& cmd) const override { return false; } - bool run(OperationContext* txn, + + bool run(OperationContext* opCtx, const string& dbName, BSONObj& cmdObj, - int, + int options, string& errmsg, - BSONObjBuilder& result) { - const string fullns = parseNs(dbName, cmdObj); - const string nsDBName = nsToDatabase(fullns); + BSONObjBuilder& result) override { + const NamespaceString nss(parseNs(dbName, cmdObj)); - auto conf = uassertStatusOK(Grid::get(txn)->catalogCache()->getDatabase(txn, nsDBName)); - if (!conf->isSharded(fullns)) { - return passthrough(txn, conf.get(), cmdObj, result); + auto routingInfo = + uassertStatusOK(Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfo(opCtx, nss)); + if (!routingInfo.cm()) { + return passthrough(opCtx, dbName, routingInfo.primaryId(), cmdObj, result); } - shared_ptr<ChunkManager> cm = conf->getChunkManager(txn, fullns); - massert(13407, "how could chunk manager be null!", cm); + const auto cm = routingInfo.cm(); BSONObj min = cmdObj.getObjectField("min"); BSONObj max = cmdObj.getObjectField("max"); @@ -892,15 +907,14 @@ public: cm->getShardIdsForRange(min, max, &shardIds); for (const ShardId& shardId : shardIds) { - const auto shardStatus = Grid::get(txn)->shardRegistry()->getShard(txn, shardId); + const auto shardStatus = Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardId); if (!shardStatus.isOK()) { - invariant(shardStatus.getStatus() == ErrorCodes::ShardNotFound); continue; } ScopedDbConnection conn(shardStatus.getValue()->getConnString()); BSONObj res; - bool ok = conn->runCommand(conf->name(), cmdObj, res); + bool ok = conn->runCommand(dbName, cmdObj, res); conn.done(); if (!ok) { @@ -924,49 +938,63 @@ public: class ConvertToCappedCmd : public NotAllowedOnShardedCollectionCmd { public: ConvertToCappedCmd() : NotAllowedOnShardedCollectionCmd("convertToCapped") {} - virtual void addRequiredPrivileges(const std::string& dbname, - const BSONObj& cmdObj, - std::vector<Privilege>* out) { + + void addRequiredPrivileges(const std::string& dbname, + const BSONObj& cmdObj, + std::vector<Privilege>* out) override { ActionSet actions; actions.addAction(ActionType::convertToCapped); out->push_back(Privilege(parseResourcePattern(dbname, cmdObj), actions)); } - virtual bool supportsWriteConcern(const BSONObj& cmd) const override { + bool supportsWriteConcern(const BSONObj& cmd) const override { return true; } + std::string parseNs(const std::string& dbname, const BSONObj& cmdObj) const override { + return parseNsCollectionRequired(dbname, cmdObj).ns(); + } + } convertToCappedCmd; class GroupCmd : public NotAllowedOnShardedCollectionCmd { public: GroupCmd() : NotAllowedOnShardedCollectionCmd("group") {} - virtual void addRequiredPrivileges(const std::string& dbname, - const BSONObj& cmdObj, - std::vector<Privilege>* out) { + + void addRequiredPrivileges(const std::string& dbname, + const BSONObj& cmdObj, + std::vector<Privilege>* out) override { ActionSet actions; actions.addAction(ActionType::find); out->push_back(Privilege(parseResourcePattern(dbname, cmdObj), actions)); } - virtual bool supportsWriteConcern(const BSONObj& cmd) const override { + bool supportsWriteConcern(const BSONObj& cmd) const override { return false; } - virtual bool passOptions() const { + bool passOptions() const override { return true; } - virtual std::string parseNs(const std::string& dbName, const BSONObj& cmdObj) const { - return dbName + "." + cmdObj.firstElement().embeddedObjectUserCheck()["ns"].valuestrsafe(); + std::string parseNs(const std::string& dbname, const BSONObj& cmdObj) const override { + const auto nsElt = cmdObj.firstElement().embeddedObjectUserCheck()["ns"]; + uassert(ErrorCodes::InvalidNamespace, + "'ns' must be of type String", + nsElt.type() == BSONType::String); + const NamespaceString nss(dbname, nsElt.valueStringData()); + uassert(ErrorCodes::InvalidNamespace, + str::stream() << "Invalid namespace: " << nss.ns(), + nss.isValid()); + return nss.ns(); } - Status explain(OperationContext* txn, + Status explain(OperationContext* opCtx, const std::string& dbname, const BSONObj& cmdObj, ExplainCommon::Verbosity verbosity, const rpc::ServerSelectionMetadata& serverSelectionMetadata, - BSONObjBuilder* out) const { + BSONObjBuilder* out) const override { // We will time how long it takes to run the commands on the shards. Timer timer; @@ -982,36 +1010,17 @@ public: const NamespaceString nss(parseNs(dbname, cmdObj)); - // Note that this implementation will not handle targeting retries and fails when the - // sharding metadata is too stale - auto status = Grid::get(txn)->catalogCache()->getDatabase(txn, nss.db()); - if (!status.isOK()) { - return Status(status.getStatus().code(), - str::stream() << "Passthrough command failed: " << command.toString() - << " on ns " - << nss.ns() - << ". Caused by " - << causedBy(status.getStatus())); - } - - shared_ptr<DBConfig> conf = status.getValue(); - if (conf->isSharded(nss.ns())) { - return Status(ErrorCodes::IllegalOperation, - str::stream() << "Passthrough command failed: " << command.toString() - << " on ns " - << nss.ns() - << ". Cannot run on sharded namespace."); - } - - const auto primaryShardStatus = - Grid::get(txn)->shardRegistry()->getShard(txn, conf->getPrimaryId()); - if (!primaryShardStatus.isOK()) { - return primaryShardStatus.getStatus(); - } + auto routingInfo = + uassertStatusOK(Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfo(opCtx, nss)); + uassert(ErrorCodes::IllegalOperation, + str::stream() << "Passthrough command failed: " << command.toString() << " on ns " + << nss.ns() + << ". Cannot run on sharded namespace.", + !routingInfo.cm()); BSONObj shardResult; try { - ShardConnection conn(primaryShardStatus.getValue()->getConnString(), ""); + ShardConnection conn(routingInfo.primary()->getConnString(), ""); // TODO: this can throw a stale config when mongos is not up-to-date -- fix. if (!conn->runCommand(nss.db().toString(), command, shardResult, options)) { @@ -1023,6 +1032,7 @@ public: << "; result: " << shardResult); } + conn.done(); } catch (const DBException& ex) { return ex.toStatus(); @@ -1030,12 +1040,12 @@ public: // Fill out the command result. Strategy::CommandResult cmdResult; - cmdResult.shardTargetId = conf->getPrimaryId(); + cmdResult.shardTargetId = routingInfo.primaryId(); cmdResult.result = shardResult; - cmdResult.target = primaryShardStatus.getValue()->getConnString(); + cmdResult.target = routingInfo.primary()->getConnString(); return ClusterExplain::buildExplainResult( - txn, {cmdResult}, ClusterExplain::kSingleShard, timer.millis(), out); + opCtx, {cmdResult}, ClusterExplain::kSingleShard, timer.millis(), out); } } groupCmd; @@ -1043,15 +1053,18 @@ public: class SplitVectorCmd : public NotAllowedOnShardedCollectionCmd { public: SplitVectorCmd() : NotAllowedOnShardedCollectionCmd("splitVector") {} - virtual bool passOptions() const { + + bool passOptions() const override { return true; } - virtual bool supportsWriteConcern(const BSONObj& cmd) const override { + + bool supportsWriteConcern(const BSONObj& cmd) const override { return false; } - virtual Status checkAuthForCommand(Client* client, - const std::string& dbname, - const BSONObj& cmdObj) { + + Status checkAuthForCommand(Client* client, + const std::string& dbname, + const BSONObj& cmdObj) override { if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource( ResourcePattern::forExactNamespace(NamespaceString(parseNs(dbname, cmdObj))), ActionType::splitVector)) { @@ -1059,62 +1072,64 @@ public: } return Status::OK(); } - virtual bool run(OperationContext* txn, - const string& dbName, - BSONObj& cmdObj, - int options, - string& errmsg, - BSONObjBuilder& result) { - string x = parseNs(dbName, cmdObj); - if (!str::startsWith(x, dbName)) { - errmsg = str::stream() << "doing a splitVector across dbs isn't supported via mongos"; - return false; - } - return NotAllowedOnShardedCollectionCmd::run(txn, dbName, cmdObj, options, errmsg, result); - } - virtual std::string parseNs(const string& dbname, const BSONObj& cmdObj) const { + + std::string parseNs(const string& dbname, const BSONObj& cmdObj) const override { return parseNsFullyQualified(dbname, cmdObj); } + bool run(OperationContext* opCtx, + const string& dbName, + BSONObj& cmdObj, + int options, + string& errmsg, + BSONObjBuilder& result) override { + const std::string ns = parseNs(dbName, cmdObj); + uassert(ErrorCodes::IllegalOperation, + "Performing splitVector across dbs isn't supported via mongos", + str::startsWith(ns, dbName)); + + return NotAllowedOnShardedCollectionCmd::run( + opCtx, dbName, cmdObj, options, errmsg, result); + } + } splitVectorCmd; class DistinctCmd : public PublicGridCommand { public: DistinctCmd() : PublicGridCommand("distinct") {} - virtual void help(stringstream& help) const { + + void help(stringstream& help) const override { help << "{ distinct : 'collection name' , key : 'a.b' , query : {} }"; } - virtual bool passOptions() const { + + bool passOptions() const override { return true; } - virtual void addRequiredPrivileges(const std::string& dbname, - const BSONObj& cmdObj, - std::vector<Privilege>* out) { + + void addRequiredPrivileges(const std::string& dbname, + const BSONObj& cmdObj, + std::vector<Privilege>* out) override { ActionSet actions; actions.addAction(ActionType::find); out->push_back(Privilege(parseResourcePattern(dbname, cmdObj), actions)); } - virtual bool supportsWriteConcern(const BSONObj& cmd) const override { + + bool supportsWriteConcern(const BSONObj& cmd) const override { return false; } - bool run(OperationContext* txn, + bool run(OperationContext* opCtx, const string& dbName, BSONObj& cmdObj, int options, string& errmsg, - BSONObjBuilder& result) { - const string fullns = parseNs(dbName, cmdObj); - - auto status = Grid::get(txn)->catalogCache()->getDatabase(txn, dbName); - if (!status.isOK()) { - return appendEmptyResultSet(result, status.getStatus(), fullns); - } - - shared_ptr<DBConfig> conf = status.getValue(); - if (!conf->isSharded(fullns)) { + BSONObjBuilder& result) override { + const NamespaceString nss(parseNsCollectionRequired(dbName, cmdObj)); - if (passthrough(txn, conf.get(), cmdObj, options, result)) { + auto routingInfo = + uassertStatusOK(Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfo(opCtx, nss)); + if (!routingInfo.cm()) { + if (passthrough(opCtx, dbName, routingInfo.primaryId(), cmdObj, options, result)) { return true; } @@ -1124,7 +1139,7 @@ public: result.resetToEmpty(); auto parsedDistinct = ParsedDistinct::parse( - txn, resolvedView.getNamespace(), cmdObj, ExtensionsCallbackNoop(), false); + opCtx, resolvedView.getNamespace(), cmdObj, ExtensionsCallbackNoop(), false); if (!parsedDistinct.isOK()) { return appendCommandStatus(result, parsedDistinct.getStatus()); } @@ -1141,7 +1156,7 @@ public: BSONObjBuilder aggResult; Command::findCommand("aggregate") - ->run(txn, dbName, aggCmd.getValue(), options, errmsg, aggResult); + ->run(opCtx, dbName, aggCmd.getValue(), options, errmsg, aggResult); ViewResponseFormatter formatter(aggResult.obj()); auto formatStatus = formatter.appendAsDistinctResponse(&result); @@ -1154,28 +1169,27 @@ public: return false; } - shared_ptr<ChunkManager> cm = conf->getChunkManager(txn, fullns); - massert(10420, "how could chunk manager be null!", cm); + const auto cm = routingInfo.cm(); - BSONObj query = getQuery(cmdObj); + auto query = getQuery(cmdObj); auto queryCollation = getCollation(cmdObj); if (!queryCollation.isOK()) { - return appendEmptyResultSet(result, queryCollation.getStatus(), fullns); + return appendEmptyResultSet(result, queryCollation.getStatus(), nss.ns()); } // Construct collator for deduping. std::unique_ptr<CollatorInterface> collator; if (!queryCollation.getValue().isEmpty()) { - auto statusWithCollator = CollatorFactoryInterface::get(txn->getServiceContext()) + auto statusWithCollator = CollatorFactoryInterface::get(opCtx->getServiceContext()) ->makeFromBSON(queryCollation.getValue()); if (!statusWithCollator.isOK()) { - return appendEmptyResultSet(result, statusWithCollator.getStatus(), fullns); + return appendEmptyResultSet(result, statusWithCollator.getStatus(), nss.ns()); } collator = std::move(statusWithCollator.getValue()); } set<ShardId> shardIds; - cm->getShardIdsForQuery(txn, query, queryCollation.getValue(), &shardIds); + cm->getShardIdsForQuery(opCtx, query, queryCollation.getValue(), &shardIds); BSONObjComparator bsonCmp(BSONObj(), BSONObjComparator::FieldNamesMode::kConsider, @@ -1184,15 +1198,15 @@ public: BSONObjSet all = bsonCmp.makeBSONObjSet(); for (const ShardId& shardId : shardIds) { - const auto shardStatus = Grid::get(txn)->shardRegistry()->getShard(txn, shardId); + const auto shardStatus = Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardId); if (!shardStatus.isOK()) { invariant(shardStatus.getStatus() == ErrorCodes::ShardNotFound); continue; } - ShardConnection conn(shardStatus.getValue()->getConnString(), fullns); + ShardConnection conn(shardStatus.getValue()->getConnString(), nss.ns()); BSONObj res; - bool ok = conn->runCommand(conf->name(), cmdObj, res, options); + bool ok = conn->runCommand(nss.db().toString(), cmdObj, res, options); conn.done(); if (!ok) { @@ -1219,7 +1233,7 @@ public: return true; } - Status explain(OperationContext* txn, + Status explain(OperationContext* opCtx, const std::string& dbname, const BSONObj& cmdObj, ExplainCommon::Verbosity verbosity, @@ -1258,7 +1272,7 @@ public: Timer timer; vector<Strategy::CommandResult> shardResults; - Strategy::commandOp(txn, + Strategy::commandOp(opCtx, dbname, explainCmdBob.obj(), options, @@ -1273,7 +1287,7 @@ public: ResolvedView::isResolvedViewErrorResponse(shardResults[0].result)) { auto resolvedView = ResolvedView::fromBSON(shardResults[0].result); auto parsedDistinct = ParsedDistinct::parse( - txn, resolvedView.getNamespace(), cmdObj, ExtensionsCallbackNoop(), true); + opCtx, resolvedView.getNamespace(), cmdObj, ExtensionsCallbackNoop(), true); if (!parsedDistinct.isOK()) { return parsedDistinct.getStatus(); } @@ -1290,7 +1304,7 @@ public: std::string errMsg; if (Command::findCommand("aggregate") - ->run(txn, dbname, aggCmd.getValue(), 0, errMsg, *out)) { + ->run(opCtx, dbname, aggCmd.getValue(), 0, errMsg, *out)) { return Status::OK(); } @@ -1300,57 +1314,67 @@ public: const char* mongosStageName = ClusterExplain::getStageNameForReadOp(shardResults, cmdObj); return ClusterExplain::buildExplainResult( - txn, shardResults, mongosStageName, millisElapsed, out); + opCtx, shardResults, mongosStageName, millisElapsed, out); } + } disinctCmd; class FileMD5Cmd : public PublicGridCommand { public: FileMD5Cmd() : PublicGridCommand("filemd5") {} - virtual void help(stringstream& help) const { + + void help(stringstream& help) const override { help << " example: { filemd5 : ObjectId(aaaaaaa) , root : \"fs\" }"; } - virtual std::string parseNs(const std::string& dbname, const BSONObj& cmdObj) const { - std::string collectionName = cmdObj.getStringField("root"); + std::string parseNs(const std::string& dbname, const BSONObj& cmdObj) const override { + std::string collectionName; + if (const auto rootElt = cmdObj["root"]) { + uassert(ErrorCodes::InvalidNamespace, + "'root' must be of type String", + rootElt.type() == BSONType::String); + collectionName = rootElt.str(); + } + if (collectionName.empty()) collectionName = "fs"; collectionName += ".chunks"; return NamespaceString(dbname, collectionName).ns(); } - virtual void addRequiredPrivileges(const std::string& dbname, - const BSONObj& cmdObj, - std::vector<Privilege>* out) { + void addRequiredPrivileges(const std::string& dbname, + const BSONObj& cmdObj, + std::vector<Privilege>* out) override { out->push_back(Privilege(parseResourcePattern(dbname, cmdObj), ActionType::find)); } - virtual bool supportsWriteConcern(const BSONObj& cmd) const override { + bool supportsWriteConcern(const BSONObj& cmd) const override { return false; } - bool run(OperationContext* txn, + bool run(OperationContext* opCtx, const string& dbName, BSONObj& cmdObj, - int, + int options, string& errmsg, - BSONObjBuilder& result) { - const string fullns = parseNs(dbName, cmdObj); + BSONObjBuilder& result) override { + const NamespaceString nss(parseNs(dbName, cmdObj)); - auto conf = uassertStatusOK(Grid::get(txn)->catalogCache()->getDatabase(txn, dbName)); - if (!conf->isSharded(fullns)) { - return passthrough(txn, conf.get(), cmdObj, result); + auto routingInfo = + uassertStatusOK(Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfo(opCtx, nss)); + if (!routingInfo.cm()) { + return passthrough(opCtx, dbName, routingInfo.primaryId(), cmdObj, result); } - shared_ptr<ChunkManager> cm = conf->getChunkManager(txn, fullns); - massert(13091, "how could chunk manager be null!", cm); + const auto cm = routingInfo.cm(); + if (SimpleBSONObjComparator::kInstance.evaluate(cm->getShardKeyPattern().toBSON() == BSON("files_id" << 1))) { BSONObj finder = BSON("files_id" << cmdObj.firstElement()); vector<Strategy::CommandResult> results; Strategy::commandOp( - txn, dbName, cmdObj, 0, fullns, finder, CollationSpec::kSimpleSpec, &results); + opCtx, dbName, cmdObj, 0, nss.ns(), finder, CollationSpec::kSimpleSpec, &results); verify(results.size() == 1); // querying on shard key so should only talk to one shard BSONObj res = results.begin()->result; @@ -1383,11 +1407,11 @@ public: vector<Strategy::CommandResult> results; try { - Strategy::commandOp(txn, + Strategy::commandOp(opCtx, dbName, shardCmd, 0, - fullns, + nss.ns(), finder, CollationSpec::kSimpleSpec, &results); @@ -1417,13 +1441,15 @@ public: errmsg = string("sharded filemd5 failed because: ") + res["errmsg"].valuestrsafe(); + return false; } - uassert(16246, - "Shard " + conf->name() + - " is too old to support GridFS sharded by {files_id:1, n:1}", - res.hasField("md5state")); + uassert( + 16246, + str::stream() << "Shard for database " << nss.db() + << " is too old to support GridFS sharded by {files_id:1, n:1}", + res.hasField("md5state")); lastResult = res; int nNext = res["numChunks"].numberInt(); @@ -1448,51 +1474,56 @@ public: "n:1}"; return false; } + } fileMD5Cmd; class Geo2dFindNearCmd : public PublicGridCommand { public: Geo2dFindNearCmd() : PublicGridCommand("geoNear") {} - void help(stringstream& h) const { + + void help(stringstream& h) const override { h << "http://dochub.mongodb.org/core/geo#GeospatialIndexing-geoNearCommand"; } - virtual bool passOptions() const { + + bool passOptions() const override { return true; } - virtual void addRequiredPrivileges(const std::string& dbname, - const BSONObj& cmdObj, - std::vector<Privilege>* out) { + + void addRequiredPrivileges(const std::string& dbname, + const BSONObj& cmdObj, + std::vector<Privilege>* out) override { ActionSet actions; actions.addAction(ActionType::find); out->push_back(Privilege(parseResourcePattern(dbname, cmdObj), actions)); } - virtual bool supportsWriteConcern(const BSONObj& cmd) const override { + + bool supportsWriteConcern(const BSONObj& cmd) const override { return false; } - bool run(OperationContext* txn, + bool run(OperationContext* opCtx, const string& dbName, BSONObj& cmdObj, int options, string& errmsg, - BSONObjBuilder& result) { - const string fullns = parseNs(dbName, cmdObj); + BSONObjBuilder& result) override { + const NamespaceString nss(parseNsCollectionRequired(dbName, cmdObj)); - auto conf = uassertStatusOK(Grid::get(txn)->catalogCache()->getDatabase(txn, dbName)); - if (!conf->isSharded(fullns)) { - return passthrough(txn, conf.get(), cmdObj, options, result); + auto routingInfo = + uassertStatusOK(Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfo(opCtx, nss)); + if (!routingInfo.cm()) { + return passthrough(opCtx, dbName, routingInfo.primaryId(), cmdObj, result); } - shared_ptr<ChunkManager> cm = conf->getChunkManager(txn, fullns); - massert(13500, "how could chunk manager be null!", cm); + const auto cm = routingInfo.cm(); BSONObj query = getQuery(cmdObj); auto collation = getCollation(cmdObj); if (!collation.isOK()) { - return appendEmptyResultSet(result, collation.getStatus(), fullns); + return appendEmptyResultSet(result, collation.getStatus(), nss.ns()); } set<ShardId> shardIds; - cm->getShardIdsForQuery(txn, query, collation.getValue(), &shardIds); + cm->getShardIdsForQuery(opCtx, query, collation.getValue(), &shardIds); // We support both "num" and "limit" options to control limit int limit = 100; @@ -1503,7 +1534,7 @@ public: list<shared_ptr<Future::CommandResult>> futures; BSONArrayBuilder shardArray; for (const ShardId& shardId : shardIds) { - const auto shardStatus = Grid::get(txn)->shardRegistry()->getShard(txn, shardId); + const auto shardStatus = Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardId); if (!shardStatus.isOK()) { invariant(shardStatus.getStatus() == ErrorCodes::ShardNotFound); continue; @@ -1524,7 +1555,7 @@ public: i != futures.end(); i++) { shared_ptr<Future::CommandResult> res = *i; - if (!res->join(txn)) { + if (!res->join(opCtx)) { errmsg = res->result()["errmsg"].String(); if (res->result().hasField("code")) { result.append(res->result()["code"]); @@ -1551,7 +1582,7 @@ public: // TODO: maybe shrink results if size() > limit } - result.append("ns", fullns); + result.append("ns", nss.ns()); result.append("near", nearStr); int outCount = 0; @@ -1584,64 +1615,41 @@ public: return true; } -} geo2dFindNearCmd; -class CompactCmd : public PublicGridCommand { -public: - CompactCmd() : PublicGridCommand("compact") {} - virtual void addRequiredPrivileges(const std::string& dbname, - const BSONObj& cmdObj, - std::vector<Privilege>* out) { - ActionSet actions; - actions.addAction(ActionType::compact); - out->push_back(Privilege(parseResourcePattern(dbname, cmdObj), actions)); - } - virtual bool supportsWriteConcern(const BSONObj& cmd) const override { - return false; - } - virtual bool run(OperationContext* txn, - const string& dbName, - BSONObj& cmdObj, - int, - string& errmsg, - BSONObjBuilder& result) { - errmsg = "compact not allowed through mongos"; - return false; - } -} compactCmd; +} geo2dFindNearCmd; class EvalCmd : public PublicGridCommand { public: EvalCmd() : PublicGridCommand("eval", "$eval") {} - virtual void addRequiredPrivileges(const std::string& dbname, - const BSONObj& cmdObj, - std::vector<Privilege>* out) { + + void addRequiredPrivileges(const std::string& dbname, + const BSONObj& cmdObj, + std::vector<Privilege>* out) override { // $eval can do pretty much anything, so require all privileges. RoleGraph::generateUniversalPrivileges(out); } - virtual bool supportsWriteConcern(const BSONObj& cmd) const override { + + bool supportsWriteConcern(const BSONObj& cmd) const override { return false; } - virtual bool run(OperationContext* txn, - const string& dbName, - BSONObj& cmdObj, - int, - string& errmsg, - BSONObjBuilder& result) { + + bool run(OperationContext* opCtx, + const string& dbName, + BSONObj& cmdObj, + int options, + string& errmsg, + BSONObjBuilder& result) override { RARELY { warning() << "the eval command is deprecated" << startupWarningsLog; } - // $eval isn't allowed to access sharded collections, but we need to leave the - // shard to detect that. - auto status = Grid::get(txn)->catalogCache()->getDatabase(txn, dbName); - if (!status.isOK()) { - return appendCommandStatus(result, status.getStatus()); - } - - shared_ptr<DBConfig> conf = status.getValue(); - return passthrough(txn, conf.get(), cmdObj, result); + // $eval isn't allowed to access sharded collections, but we need to leave the shard to + // detect that + const auto dbInfo = + uassertStatusOK(Grid::get(opCtx)->catalogCache()->getDatabase(opCtx, dbName)); + return passthrough(opCtx, dbName, dbInfo.primaryId(), cmdObj, result); } + } evalCmd; class CmdListCollections final : public PublicGridCommand { @@ -1667,11 +1675,11 @@ public: str::stream() << "Not authorized to create users on db: " << dbname); } - virtual bool supportsWriteConcern(const BSONObj& cmd) const override { + bool supportsWriteConcern(const BSONObj& cmd) const override { return false; } - bool run(OperationContext* txn, + bool run(OperationContext* opCtx, const string& dbName, BSONObj& cmdObj, int options, @@ -1679,18 +1687,23 @@ public: BSONObjBuilder& result) final { auto nss = NamespaceString::makeListCollectionsNSS(dbName); - auto conf = Grid::get(txn)->catalogCache()->getDatabase(txn, dbName); - if (!conf.isOK()) { - return appendEmptyResultSet(result, conf.getStatus(), dbName + ".$cmd.listCollections"); + auto dbInfoStatus = Grid::get(opCtx)->catalogCache()->getDatabase(opCtx, dbName); + if (!dbInfoStatus.isOK()) { + return appendEmptyResultSet(result, dbInfoStatus.getStatus(), nss.ns()); } - return cursorCommandPassthrough(txn, conf.getValue(), cmdObj, nss, options, &result); + const auto& dbInfo = dbInfoStatus.getValue(); + + return cursorCommandPassthrough( + opCtx, dbName, dbInfo.primaryId(), cmdObj, nss, options, &result); } + } cmdListCollections; class CmdListIndexes final : public PublicGridCommand { public: CmdListIndexes() : PublicGridCommand("listIndexes") {} + virtual Status checkAuthForCommand(Client* client, const std::string& dbname, const BSONObj& cmdObj) { @@ -1712,27 +1725,25 @@ public: << ns.coll()); } - virtual bool supportsWriteConcern(const BSONObj& cmd) const override { + bool supportsWriteConcern(const BSONObj& cmd) const override { return false; } - bool run(OperationContext* txn, + bool run(OperationContext* opCtx, const string& dbName, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result) final { - auto conf = Grid::get(txn)->catalogCache()->getDatabase(txn, dbName); - if (!conf.isOK()) { - return appendCommandStatus(result, conf.getStatus()); - } + const NamespaceString nss(parseNsCollectionRequired(dbName, cmdObj)); + + const auto routingInfo = + uassertStatusOK(Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfo(opCtx, nss)); - const NamespaceString targetNss(parseNsCollectionRequired(dbName, cmdObj)); - const NamespaceString commandNss = - NamespaceString::makeListIndexesNSS(targetNss.db(), targetNss.coll()); - dassert(targetNss == commandNss.getTargetNSForListIndexes()); + const auto commandNss = NamespaceString::makeListIndexesNSS(nss.db(), nss.coll()); - return cursorCommandPassthrough(txn, conf.getValue(), cmdObj, commandNss, options, &result); + return cursorCommandPassthrough( + opCtx, nss.db(), routingInfo.primaryId(), cmdObj, commandNss, options, &result); } } cmdListIndexes; diff --git a/src/mongo/s/commands/run_on_all_shards_cmd.cpp b/src/mongo/s/commands/run_on_all_shards_cmd.cpp index 9b0b26cf14b..881b7d654ab 100644 --- a/src/mongo/s/commands/run_on_all_shards_cmd.cpp +++ b/src/mongo/s/commands/run_on_all_shards_cmd.cpp @@ -36,12 +36,12 @@ #include <set> #include "mongo/db/jsobj.h" +#include "mongo/s/catalog_cache.h" #include "mongo/s/client/shard.h" #include "mongo/s/client/shard_registry.h" #include "mongo/s/commands/cluster_commands_common.h" #include "mongo/s/commands/sharded_command_processing.h" #include "mongo/s/grid.h" -#include "mongo/s/sharding_raii.h" #include "mongo/util/log.h" namespace mongo { @@ -64,14 +64,14 @@ BSONObj RunOnAllShardsCommand::specialErrorHandler(const std::string& server, return originalResult; } -void RunOnAllShardsCommand::getShardIds(OperationContext* txn, +void RunOnAllShardsCommand::getShardIds(OperationContext* opCtx, const std::string& db, BSONObj& cmdObj, std::vector<ShardId>& shardIds) { grid.shardRegistry()->getAllShardIds(&shardIds); } -bool RunOnAllShardsCommand::run(OperationContext* txn, +bool RunOnAllShardsCommand::run(OperationContext* opCtx, const std::string& dbName, BSONObj& cmdObj, int options, @@ -80,15 +80,15 @@ bool RunOnAllShardsCommand::run(OperationContext* txn, LOG(1) << "RunOnAllShardsCommand db: " << dbName << " cmd:" << redact(cmdObj); if (_implicitCreateDb) { - uassertStatusOK(ScopedShardDatabase::getOrCreate(txn, dbName)); + uassertStatusOK(createShardDatabase(opCtx, dbName)); } std::vector<ShardId> shardIds; - getShardIds(txn, dbName, cmdObj, shardIds); + getShardIds(opCtx, dbName, cmdObj, shardIds); std::list<std::shared_ptr<Future::CommandResult>> futures; for (const ShardId& shardId : shardIds) { - const auto shardStatus = grid.shardRegistry()->getShard(txn, shardId); + const auto shardStatus = grid.shardRegistry()->getShard(opCtx, shardId); if (!shardStatus.isOK()) { continue; } @@ -120,7 +120,7 @@ bool RunOnAllShardsCommand::run(OperationContext* txn, ++futuresit, ++shardIdsIt) { std::shared_ptr<Future::CommandResult> res = *futuresit; - if (res->join(txn)) { + if (res->join(opCtx)) { // success :) BSONObj result = res->result(); results.emplace_back(shardIdsIt->toString(), result); diff --git a/src/mongo/s/commands/strategy.cpp b/src/mongo/s/commands/strategy.cpp index 1ed18cbc1ec..998e401682b 100644 --- a/src/mongo/s/commands/strategy.cpp +++ b/src/mongo/s/commands/strategy.cpp @@ -52,8 +52,6 @@ #include "mongo/rpc/get_status_from_command_result.h" #include "mongo/rpc/metadata/server_selection_metadata.h" #include "mongo/s/catalog_cache.h" -#include "mongo/s/chunk_manager.h" -#include "mongo/s/chunk_version.h" #include "mongo/s/client/parallel.h" #include "mongo/s/client/shard_connection.h" #include "mongo/s/client/shard_registry.h" @@ -79,7 +77,7 @@ using std::stringstream; namespace { -void runAgainstRegistered(OperationContext* txn, +void runAgainstRegistered(OperationContext* opCtx, const char* ns, BSONObj& jsobj, BSONObjBuilder& anObjBuilder, @@ -101,17 +99,17 @@ void runAgainstRegistered(OperationContext* txn, return; } - Command::execCommandClient(txn, c, queryOptions, ns, jsobj, anObjBuilder); + Command::execCommandClient(opCtx, c, queryOptions, ns, jsobj, anObjBuilder); } } // namespace -void Strategy::queryOp(OperationContext* txn, const NamespaceString& nss, DbMessage* dbm) { +void Strategy::queryOp(OperationContext* opCtx, const NamespaceString& nss, DbMessage* dbm) { globalOpCounters.gotQuery(); const QueryMessage q(*dbm); - Client* const client = txn->getClient(); + Client* const client = opCtx->getClient(); AuthorizationSession* const authSession = AuthorizationSession::get(client); Status status = authSession->checkAuthForFind(nss, false); @@ -146,7 +144,7 @@ void Strategy::queryOp(OperationContext* txn, const NamespaceString& nss, DbMess }(); auto canonicalQuery = - uassertStatusOK(CanonicalQuery::canonicalize(txn, q, ExtensionsCallbackNoop())); + uassertStatusOK(CanonicalQuery::canonicalize(opCtx, q, ExtensionsCallbackNoop())); // If the $explain flag was set, we must run the operation on the shards as an explain command // rather than a find command. @@ -162,7 +160,7 @@ void Strategy::queryOp(OperationContext* txn, const NamespaceString& nss, DbMess BSONObjBuilder explainBuilder; uassertStatusOK(Strategy::explainFind( - txn, findCommand, queryRequest, verbosity, metadata, &explainBuilder)); + opCtx, findCommand, queryRequest, verbosity, metadata, &explainBuilder)); BSONObj explainObj = explainBuilder.done(); replyToQuery(0, // query result flags @@ -183,7 +181,7 @@ void Strategy::queryOp(OperationContext* txn, const NamespaceString& nss, DbMess // 0 means the cursor is exhausted. Otherwise we assume that a cursor with the returned id can // be retrieved via the ClusterCursorManager. auto cursorId = - ClusterFind::runQuery(txn, + ClusterFind::runQuery(opCtx, *canonicalQuery, readPreference, &batch, @@ -212,10 +210,12 @@ void Strategy::queryOp(OperationContext* txn, const NamespaceString& nss, DbMess cursorId.getValue()); } -void Strategy::clientCommandOp(OperationContext* txn, const NamespaceString& nss, DbMessage* dbm) { +void Strategy::clientCommandOp(OperationContext* opCtx, + const NamespaceString& nss, + DbMessage* dbm) { const QueryMessage q(*dbm); - Client* const client = txn->getClient(); + Client* const client = opCtx->getClient(); LOG(3) << "command: " << q.ns << " " << redact(q.query) << " ntoreturn: " << q.ntoreturn << " options: " << q.queryOptions; @@ -245,7 +245,7 @@ void Strategy::clientCommandOp(OperationContext* txn, const NamespaceString& nss const NamespaceString interposedNss("admin", "$cmd"); BSONObjBuilder reply; runAgainstRegistered( - txn, interposedNss.ns().c_str(), interposedCmd, reply, q.queryOptions); + opCtx, interposedNss.ns().c_str(), interposedCmd, reply, q.queryOptions); replyToQuery(0, client->session(), dbm->msg(), reply.done()); }; @@ -299,7 +299,7 @@ void Strategy::clientCommandOp(OperationContext* txn, const NamespaceString& nss const int maxTimeMS = uassertStatusOK(QueryRequest::parseMaxTimeMS(cmdObj[QueryRequest::cmdOptionMaxTimeMS])); if (maxTimeMS > 0) { - txn->setDeadlineAfterNowBy(Milliseconds{maxTimeMS}); + opCtx->setDeadlineAfterNowBy(Milliseconds{maxTimeMS}); } int loops = 5; @@ -309,7 +309,7 @@ void Strategy::clientCommandOp(OperationContext* txn, const NamespaceString& nss OpQueryReplyBuilder reply; { BSONObjBuilder builder(reply.bufBuilderForResults()); - runAgainstRegistered(txn, q.ns, cmdObj, builder, q.queryOptions); + runAgainstRegistered(opCtx, q.ns, cmdObj, builder, q.queryOptions); } reply.sendCommandReply(client->session(), dbm->msg()); return; @@ -324,13 +324,12 @@ void Strategy::clientCommandOp(OperationContext* txn, const NamespaceString& nss // For legacy reasons, ns may not actually be set in the exception const std::string staleNS(e.getns().empty() ? std::string(q.ns) : e.getns()); - ShardConnection::checkMyConnectionVersions(txn, staleNS); + ShardConnection::checkMyConnectionVersions(opCtx, staleNS); if (loops < 4) { - // This throws out the entire database cache entry in response to - // StaleConfigException instead of just the collection which encountered it. There - // is no good reason for it other than the lack of lower-granularity cache - // invalidation. - Grid::get(txn)->catalogCache()->invalidate(NamespaceString(staleNS).db()); + const NamespaceString nss(staleNS); + if (nss.isValid()) { + Grid::get(opCtx)->catalogCache()->invalidateShardedCollection(nss); + } } } catch (const DBException& e) { OpQueryReplyBuilder reply; @@ -344,7 +343,7 @@ void Strategy::clientCommandOp(OperationContext* txn, const NamespaceString& nss } } -void Strategy::commandOp(OperationContext* txn, +void Strategy::commandOp(OperationContext* opCtx, const string& db, const BSONObj& command, int options, @@ -358,7 +357,7 @@ void Strategy::commandOp(OperationContext* txn, qSpec, CommandInfo(versionedNS, targetingQuery, targetingCollation)); // Initialize the cursor - cursor.init(txn); + cursor.init(opCtx); set<ShardId> shardIds; cursor.getQueryShardIds(shardIds); @@ -374,7 +373,7 @@ void Strategy::commandOp(OperationContext* txn, } } -void Strategy::getMore(OperationContext* txn, const NamespaceString& nss, DbMessage* dbm) { +void Strategy::getMore(OperationContext* opCtx, const NamespaceString& nss, DbMessage* dbm) { const int ntoreturn = dbm->pullInt(); uassert( 34424, str::stream() << "Invalid ntoreturn for OP_GET_MORE: " << ntoreturn, ntoreturn >= 0); @@ -382,12 +381,12 @@ void Strategy::getMore(OperationContext* txn, const NamespaceString& nss, DbMess globalOpCounters.gotGetMore(); - Client* const client = txn->getClient(); + Client* const client = opCtx->getClient(); // TODO: Handle stale config exceptions here from coll being dropped or sharded during op for // now has same semantics as legacy request. - auto statusGetDb = Grid::get(txn)->catalogCache()->getDatabase(txn, nss.db()); + auto statusGetDb = Grid::get(opCtx)->catalogCache()->getDatabase(opCtx, nss.db()); if (statusGetDb == ErrorCodes::NamespaceNotFound) { replyToQuery(ResultFlag_CursorNotFound, client->session(), dbm->msg(), 0, 0, 0); return; @@ -401,7 +400,7 @@ void Strategy::getMore(OperationContext* txn, const NamespaceString& nss, DbMess GetMoreRequest getMoreRequest(nss, cursorId, batchSize, boost::none, boost::none, boost::none); - auto cursorResponse = ClusterFind::runGetMore(txn, getMoreRequest); + auto cursorResponse = ClusterFind::runGetMore(opCtx, getMoreRequest); if (cursorResponse == ErrorCodes::CursorNotFound) { replyToQuery(ResultFlag_CursorNotFound, client->session(), dbm->msg(), 0, 0, 0); return; @@ -427,7 +426,7 @@ void Strategy::getMore(OperationContext* txn, const NamespaceString& nss, DbMess cursorResponse.getValue().getCursorId()); } -void Strategy::killCursors(OperationContext* txn, DbMessage* dbm) { +void Strategy::killCursors(OperationContext* opCtx, DbMessage* dbm) { const int numCursors = dbm->pullInt(); massert(34425, str::stream() << "Invalid killCursors message. numCursors: " << numCursors @@ -444,9 +443,9 @@ void Strategy::killCursors(OperationContext* txn, DbMessage* dbm) { ConstDataCursor cursors(dbm->getArray(numCursors)); - Client* const client = txn->getClient(); + Client* const client = opCtx->getClient(); AuthorizationSession* const authSession = AuthorizationSession::get(client); - ClusterCursorManager* const manager = Grid::get(txn)->getCursorManager(); + ClusterCursorManager* const manager = Grid::get(opCtx)->getCursorManager(); for (int i = 0; i < numCursors; ++i) { const CursorId cursorId = cursors.readAndAdvance<LittleEndian<int64_t>>(); @@ -480,13 +479,13 @@ void Strategy::killCursors(OperationContext* txn, DbMessage* dbm) { } } -void Strategy::writeOp(OperationContext* txn, DbMessage* dbm) { +void Strategy::writeOp(OperationContext* opCtx, DbMessage* dbm) { OwnedPointerVector<BatchedCommandRequest> commandRequestsOwned; std::vector<BatchedCommandRequest*>& commandRequests = commandRequestsOwned.mutableVector(); msgToBatchRequests(dbm->msg(), &commandRequests); - auto& clientLastError = LastError::get(txn->getClient()); + auto& clientLastError = LastError::get(opCtx->getClient()); for (auto it = commandRequests.begin(); it != commandRequests.end(); ++it) { // Multiple commands registered to last error as multiple requests @@ -509,7 +508,7 @@ void Strategy::writeOp(OperationContext* txn, DbMessage* dbm) { BSONObj commandBSON = commandRequest->toBSON(); BSONObjBuilder builder; - runAgainstRegistered(txn, cmdNS.c_str(), commandBSON, builder, 0); + runAgainstRegistered(opCtx, cmdNS.c_str(), commandBSON, builder, 0); bool parsed = commandResponse.parseBSON(builder.done(), nullptr); (void)parsed; // for compile @@ -529,7 +528,7 @@ void Strategy::writeOp(OperationContext* txn, DbMessage* dbm) { } } -Status Strategy::explainFind(OperationContext* txn, +Status Strategy::explainFind(OperationContext* opCtx, const BSONObj& findCommand, const QueryRequest& qr, ExplainCommon::Verbosity verbosity, @@ -544,7 +543,7 @@ Status Strategy::explainFind(OperationContext* txn, Timer timer; std::vector<Strategy::CommandResult> shardResults; - Strategy::commandOp(txn, + Strategy::commandOp(opCtx, qr.nss().db().toString(), explainCmdBob.obj(), options, @@ -564,7 +563,7 @@ Status Strategy::explainFind(OperationContext* txn, const char* mongosStageName = ClusterExplain::getStageNameForReadOp(shardResults, findCommand); return ClusterExplain::buildExplainResult( - txn, shardResults, mongosStageName, millisElapsed, out); + opCtx, shardResults, mongosStageName, millisElapsed, out); } } // namespace mongo diff --git a/src/mongo/s/config.cpp b/src/mongo/s/config.cpp deleted file mode 100644 index c16f671ba78..00000000000 --- a/src/mongo/s/config.cpp +++ /dev/null @@ -1,366 +0,0 @@ -/** - * Copyright (C) 2008-2015 MongoDB Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the GNU Affero General Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding - -#include "mongo/platform/basic.h" - -#include "mongo/s/config.h" - -#include <vector> - -#include "mongo/db/lasterror.h" -#include "mongo/db/operation_context.h" -#include "mongo/db/query/collation/collator_factory_interface.h" -#include "mongo/s/catalog/sharding_catalog_client.h" -#include "mongo/s/catalog/type_chunk.h" -#include "mongo/s/catalog/type_collection.h" -#include "mongo/s/catalog/type_database.h" -#include "mongo/s/catalog_cache.h" -#include "mongo/s/chunk_manager.h" -#include "mongo/s/chunk_version.h" -#include "mongo/s/grid.h" -#include "mongo/stdx/memory.h" -#include "mongo/util/log.h" - -namespace mongo { - -struct CollectionInfo { - // The config server opTime at which the chunk manager below was loaded - const repl::OpTime configOpTime; - - // The chunk manager - const std::shared_ptr<ChunkManager> cm; -}; - -DBConfig::DBConfig(const DatabaseType& dbt, repl::OpTime configOpTime) - : _name(dbt.getName()), - _shardingEnabled(dbt.getSharded()), - _primaryId(dbt.getPrimary()), - _configOpTime(std::move(configOpTime)) {} - -DBConfig::~DBConfig() = default; - -bool DBConfig::isSharded(const std::string& ns) { - stdx::lock_guard<stdx::mutex> lk(_lock); - - return _collections.count(ns) > 0; -} - -void DBConfig::markNSNotSharded(const std::string& ns) { - stdx::lock_guard<stdx::mutex> lk(_lock); - - CollectionInfoMap::iterator it = _collections.find(ns); - if (it != _collections.end()) { - _collections.erase(it); - } -} - -std::shared_ptr<ChunkManager> DBConfig::getChunkManagerIfExists(OperationContext* txn, - const std::string& ns, - bool shouldReload, - bool forceReload) { - // Don't report exceptions here as errors in GetLastError - LastError::Disabled ignoreForGLE(&LastError::get(cc())); - - try { - return getChunkManager(txn, ns, shouldReload, forceReload); - } catch (const DBException&) { - return nullptr; - } -} - -std::shared_ptr<ChunkManager> DBConfig::getChunkManager(OperationContext* txn, - const std::string& ns, - bool shouldReload, - bool forceReload) { - ChunkVersion oldVersion; - std::shared_ptr<ChunkManager> oldManager; - - { - stdx::lock_guard<stdx::mutex> lk(_lock); - - auto it = _collections.find(ns); - - const bool earlyReload = (it == _collections.end()) && (shouldReload || forceReload); - if (earlyReload) { - // This is to catch cases where there this is a new sharded collection. - // Note: read the _reloadCount inside the _lock mutex, so _loadIfNeeded will always - // be forced to perform a reload. - const auto currentReloadIteration = _reloadCount.load(); - _loadIfNeeded(txn, currentReloadIteration); - - it = _collections.find(ns); - } - - uassert(ErrorCodes::NamespaceNotSharded, - str::stream() << "Collection is not sharded: " << ns, - it != _collections.end()); - - const auto& ci = it->second; - - if (!(shouldReload || forceReload) || earlyReload) { - return ci.cm; - } - - if (ci.cm) { - oldManager = ci.cm; - oldVersion = ci.cm->getVersion(); - } - } - - // TODO: We need to keep this first one-chunk check in until we have a more efficient way of - // creating/reusing a chunk manager, as doing so requires copying the full set of chunks - // currently - std::vector<ChunkType> newestChunk; - if (oldVersion.isSet() && !forceReload) { - uassertStatusOK(Grid::get(txn)->catalogClient(txn)->getChunks( - txn, - BSON(ChunkType::ns(ns)), - BSON(ChunkType::DEPRECATED_lastmod() << -1), - 1, - &newestChunk, - nullptr, - repl::ReadConcernLevel::kMajorityReadConcern)); - - if (!newestChunk.empty()) { - invariant(newestChunk.size() == 1); - ChunkVersion v = newestChunk[0].getVersion(); - if (v.equals(oldVersion)) { - stdx::lock_guard<stdx::mutex> lk(_lock); - - auto it = _collections.find(ns); - uassert(15885, - str::stream() << "not sharded after reloading from chunks : " << ns, - it != _collections.end()); - - const auto& ci = it->second; - return ci.cm; - } - } - } else if (!oldVersion.isSet()) { - warning() << "version 0 found when " << (forceReload ? "reloading" : "checking") - << " chunk manager; collection '" << ns << "' initially detected as sharded"; - } - - std::unique_ptr<ChunkManager> tempChunkManager; - - { - stdx::lock_guard<stdx::mutex> lll(_hitConfigServerLock); - - if (!newestChunk.empty() && !forceReload) { - // If we have a target we're going for see if we've hit already - stdx::lock_guard<stdx::mutex> lk(_lock); - - auto it = _collections.find(ns); - - if (it != _collections.end()) { - const auto& ci = it->second; - - ChunkVersion currentVersion = newestChunk[0].getVersion(); - - // Only reload if the version we found is newer than our own in the same epoch - if (currentVersion <= ci.cm->getVersion() && - ci.cm->getVersion().hasEqualEpoch(currentVersion)) { - return ci.cm; - } - } - } - - // Reload the chunk manager outside of the DBConfig's mutex so as to not block operations - // for different collections on the same database - tempChunkManager.reset(new ChunkManager( - NamespaceString(oldManager->getns()), - oldManager->getVersion().epoch(), - oldManager->getShardKeyPattern(), - oldManager->getDefaultCollator() ? oldManager->getDefaultCollator()->clone() : nullptr, - oldManager->isUnique())); - tempChunkManager->loadExistingRanges(txn, oldManager.get()); - - if (!tempChunkManager->numChunks()) { - // Maybe we're not sharded any more, so do a full reload - const auto currentReloadIteration = _reloadCount.load(); - - const bool successful = [&]() { - stdx::lock_guard<stdx::mutex> lk(_lock); - return _loadIfNeeded(txn, currentReloadIteration); - }(); - - // If we aren't successful loading the database entry, we don't want to keep the stale - // object around which has invalid data. - if (!successful) { - Grid::get(txn)->catalogCache()->invalidate(_name); - } - - return getChunkManager(txn, ns); - } - } - - stdx::lock_guard<stdx::mutex> lk(_lock); - - auto it = _collections.find(ns); - uassert(14822, - str::stream() << "Collection " << ns << " became unsharded in the middle.", - it != _collections.end()); - - const auto& ci = it->second; - - // Reset if our versions aren't the same - bool shouldReset = !tempChunkManager->getVersion().equals(ci.cm->getVersion()); - - // Also reset if we're forced to do so - if (!shouldReset && forceReload) { - shouldReset = true; - warning() << "chunk manager reload forced for collection '" << ns << "', config version is " - << tempChunkManager->getVersion(); - } - - // - // LEGACY BEHAVIOR - // - // It's possible to get into a state when dropping collections when our new version is - // less than our prev version. Behave identically to legacy mongos, for now, and warn to - // draw attention to the problem. - // - // TODO: Assert in next version, to allow smooth upgrades - // - - if (shouldReset && tempChunkManager->getVersion() < ci.cm->getVersion()) { - shouldReset = false; - - warning() << "not resetting chunk manager for collection '" << ns << "', config version is " - << tempChunkManager->getVersion() << " and " - << "old version is " << ci.cm->getVersion(); - } - - // end legacy behavior - - if (shouldReset) { - const auto cmOpTime = tempChunkManager->getConfigOpTime(); - - // The existing ChunkManager could have been updated since we last checked, so replace the - // existing chunk manager only if it is strictly newer. - if (cmOpTime > ci.cm->getConfigOpTime()) { - _collections.erase(ns); - auto emplacedEntryIt = - _collections.emplace(ns, CollectionInfo{cmOpTime, std::move(tempChunkManager)}) - .first; - return emplacedEntryIt->second.cm; - } - } - - return ci.cm; -} - -bool DBConfig::load(OperationContext* txn) { - const auto currentReloadIteration = _reloadCount.load(); - stdx::lock_guard<stdx::mutex> lk(_lock); - return _loadIfNeeded(txn, currentReloadIteration); -} - -bool DBConfig::_loadIfNeeded(OperationContext* txn, Counter reloadIteration) { - if (reloadIteration != _reloadCount.load()) { - return true; - } - - const auto catalogClient = Grid::get(txn)->catalogClient(txn); - - auto status = catalogClient->getDatabase(txn, _name); - if (status == ErrorCodes::NamespaceNotFound) { - return false; - } - - // All other errors are connectivity, etc so throw an exception. - uassertStatusOK(status.getStatus()); - - const auto& dbOpTimePair = status.getValue(); - const auto& dbt = dbOpTimePair.value; - invariant(_name == dbt.getName()); - _primaryId = dbt.getPrimary(); - - invariant(dbOpTimePair.opTime >= _configOpTime); - _configOpTime = dbOpTimePair.opTime; - - // Load all collections - std::vector<CollectionType> collections; - repl::OpTime configOpTimeWhenLoadingColl; - uassertStatusOK( - catalogClient->getCollections(txn, &_name, &collections, &configOpTimeWhenLoadingColl)); - - invariant(configOpTimeWhenLoadingColl >= _configOpTime); - - for (const auto& coll : collections) { - auto collIter = _collections.find(coll.getNs().ns()); - if (collIter != _collections.end()) { - invariant(configOpTimeWhenLoadingColl >= collIter->second.configOpTime); - } - - _collections.erase(coll.getNs().ns()); - - if (!coll.getDropped()) { - std::unique_ptr<CollatorInterface> defaultCollator; - if (!coll.getDefaultCollation().isEmpty()) { - auto statusWithCollator = CollatorFactoryInterface::get(txn->getServiceContext()) - ->makeFromBSON(coll.getDefaultCollation()); - - // The collation was validated upon collection creation. - invariantOK(statusWithCollator.getStatus()); - - defaultCollator = std::move(statusWithCollator.getValue()); - } - - std::unique_ptr<ChunkManager> manager( - stdx::make_unique<ChunkManager>(coll.getNs(), - coll.getEpoch(), - ShardKeyPattern(coll.getKeyPattern()), - std::move(defaultCollator), - coll.getUnique())); - - // Do the blocking collection load - manager->loadExistingRanges(txn, nullptr); - - // Collections with no chunks are unsharded, no matter what the collections entry says - if (manager->numChunks()) { - _collections.emplace( - coll.getNs().ns(), - CollectionInfo{configOpTimeWhenLoadingColl, std::move(manager)}); - } - } - } - - _reloadCount.fetchAndAdd(1); - - return true; -} - -ShardId DBConfig::getPrimaryId() { - stdx::lock_guard<stdx::mutex> lk(_lock); - return _primaryId; -} - -} // namespace mongo diff --git a/src/mongo/s/config.h b/src/mongo/s/config.h deleted file mode 100644 index 2b0ecbb7d07..00000000000 --- a/src/mongo/s/config.h +++ /dev/null @@ -1,146 +0,0 @@ -/** - * Copyright (C) 2008-2015 MongoDB Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the GNU Affero General Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#pragma once - -#include <string> - -#include "mongo/db/repl/optime.h" -#include "mongo/platform/atomic_word.h" -#include "mongo/s/shard_id.h" -#include "mongo/stdx/mutex.h" - -namespace mongo { - -class ChunkManager; -struct CollectionInfo; -class DatabaseType; -class OperationContext; - -/** - * Represents the cache entry for a database. - */ -class DBConfig { -public: - DBConfig(const DatabaseType& dbt, repl::OpTime configOpTime); - ~DBConfig(); - - /** - * The name of the database which this entry caches. - */ - const std::string& name() const { - return _name; - } - - ShardId getPrimaryId(); - - /** - * Returns whether 'enableSharding' has been called for this database. - */ - bool isShardingEnabled() const { - return _shardingEnabled; - } - - /** - * Removes the specified namespace from the set of collections under this database entry so that - * from then onwards it will be treated as unsharded. - * - * Note that this method doesn't do any writes to the config metadata, but simply drops the - * specified namespace from the cache. - */ - void markNSNotSharded(const std::string& ns); - - /** - * @return whether or not the 'ns' collection is partitioned - */ - bool isSharded(const std::string& ns); - - std::shared_ptr<ChunkManager> getChunkManager(OperationContext* txn, - const std::string& ns, - bool reload = false, - bool forceReload = false); - std::shared_ptr<ChunkManager> getChunkManagerIfExists(OperationContext* txn, - const std::string& ns, - bool reload = false, - bool forceReload = false); - - /** - * Returns true if it is successful at loading the DBConfig, false if the database is not found, - * and throws on all other errors. - */ - bool load(OperationContext* txn); - -protected: - typedef std::map<std::string, CollectionInfo> CollectionInfoMap; - typedef AtomicUInt64::WordType Counter; - - /** - * Returns true if it is successful at loading the DBConfig, false if the database is not found, - * and throws on all other errors. - * Also returns true without reloading if reloadIteration is not equal to the _reloadCount. - * This is to avoid multiple threads attempting to reload do duplicate work. - */ - bool _loadIfNeeded(OperationContext* txn, Counter reloadIteration); - - // All member variables are labeled with one of the following codes indicating the - // synchronization rules for accessing them. - // - // (L) Must hold _lock for access. - // (S) Self synchronizing, no explicit locking needed. - // - // Mutex lock order: - // _hitConfigServerLock -> _lock - // - - // Name of the database which this entry caches - const std::string _name; - - // Whether sharding is enabled for this database - const bool _shardingEnabled; - - // Primary shard id - ShardId _primaryId; // (L) - - // Set of collections and lock to protect access - stdx::mutex _lock; - CollectionInfoMap _collections; // (L) - - // OpTime of config server when the database definition was loaded. - repl::OpTime _configOpTime; // (L) - - // Ensures that only one thread at a time loads collection configuration data from - // the config server - stdx::mutex _hitConfigServerLock; - - // Increments every time this performs a full reload. Since a full reload can take a very - // long time for very large clusters, this can be used to minimize duplicate work when multiple - // threads tries to perform full rerload at roughly the same time. - AtomicUInt64 _reloadCount; // (S) -}; - -} // namespace mongo diff --git a/src/mongo/s/query/cluster_find.cpp b/src/mongo/s/query/cluster_find.cpp index 2f07e8494c7..aa3b0f286e4 100644 --- a/src/mongo/s/query/cluster_find.cpp +++ b/src/mongo/s/query/cluster_find.cpp @@ -52,7 +52,6 @@ #include "mongo/s/query/cluster_client_cursor_impl.h" #include "mongo/s/query/cluster_cursor_manager.h" #include "mongo/s/query/store_possible_cursor.h" -#include "mongo/s/sharding_raii.h" #include "mongo/s/stale_exception.h" #include "mongo/stdx/memory.h" #include "mongo/util/fail_point_service.h" @@ -149,14 +148,14 @@ StatusWith<std::unique_ptr<QueryRequest>> transformQueryForShards(const QueryReq return std::move(newQR); } -StatusWith<CursorId> runQueryWithoutRetrying(OperationContext* txn, +StatusWith<CursorId> runQueryWithoutRetrying(OperationContext* opCtx, const CanonicalQuery& query, const ReadPreferenceSetting& readPref, ChunkManager* chunkManager, std::shared_ptr<Shard> primary, std::vector<BSONObj>* results, BSONObj* viewDefinition) { - auto shardRegistry = Grid::get(txn)->shardRegistry(); + auto shardRegistry = Grid::get(opCtx)->shardRegistry(); // Get the set of shards on which we will run the query. std::vector<std::shared_ptr<Shard>> shards; @@ -166,13 +165,13 @@ StatusWith<CursorId> runQueryWithoutRetrying(OperationContext* txn, invariant(chunkManager); std::set<ShardId> shardIds; - chunkManager->getShardIdsForQuery(txn, + chunkManager->getShardIdsForQuery(opCtx, query.getQueryRequest().getFilter(), query.getQueryRequest().getCollation(), &shardIds); for (auto id : shardIds) { - auto shardStatus = shardRegistry->getShard(txn, id); + auto shardStatus = shardRegistry->getShard(opCtx, id); if (!shardStatus.isOK()) { return shardStatus.getStatus(); } @@ -187,7 +186,7 @@ StatusWith<CursorId> runQueryWithoutRetrying(OperationContext* txn, params.isTailable = query.getQueryRequest().isTailable(); params.isAwaitData = query.getQueryRequest().isAwaitData(); params.isAllowPartialResults = query.getQueryRequest().isAllowPartialResults(); - params.txn = txn; + params.txn = opCtx; // This is the batchSize passed to each subsequent getMore command issued by the cursor. We // usually use the batchSize associated with the initial find, but as it is illegal to send a @@ -232,7 +231,7 @@ StatusWith<CursorId> runQueryWithoutRetrying(OperationContext* txn, } auto ccc = ClusterClientCursorImpl::make( - Grid::get(txn)->getExecutorPool()->getArbitraryExecutor(), std::move(params)); + Grid::get(opCtx)->getExecutorPool()->getArbitraryExecutor(), std::move(params)); auto cursorState = ClusterCursorManager::CursorState::NotExhausted; int bytesBuffered = 0; @@ -287,7 +286,7 @@ StatusWith<CursorId> runQueryWithoutRetrying(OperationContext* txn, } // Register the cursor with the cursor manager. - auto cursorManager = Grid::get(txn)->getCursorManager(); + auto cursorManager = Grid::get(opCtx)->getCursorManager(); const auto cursorType = chunkManager ? ClusterCursorManager::CursorType::NamespaceSharded : ClusterCursorManager::CursorType::NamespaceNotSharded; const auto cursorLifetime = query.getQueryRequest().isNoCursorTimeout() @@ -301,7 +300,7 @@ StatusWith<CursorId> runQueryWithoutRetrying(OperationContext* txn, const size_t ClusterFind::kMaxStaleConfigRetries = 10; -StatusWith<CursorId> ClusterFind::runQuery(OperationContext* txn, +StatusWith<CursorId> ClusterFind::runQuery(OperationContext* opCtx, const CanonicalQuery& query, const ReadPreferenceSetting& readPref, std::vector<BSONObj>* results, @@ -317,26 +316,34 @@ StatusWith<CursorId> ClusterFind::runQuery(OperationContext* txn, << query.getQueryRequest().getProj()}; } + auto const catalogCache = Grid::get(opCtx)->catalogCache(); + // Re-target and re-send the initial find command to the shards until we have established the // shard version. for (size_t retries = 1; retries <= kMaxStaleConfigRetries; ++retries) { - auto scopedCMStatus = ScopedChunkManager::get(txn, query.nss()); - if (scopedCMStatus == ErrorCodes::NamespaceNotFound) { + auto routingInfoStatus = catalogCache->getCollectionRoutingInfo(opCtx, query.nss()); + if (routingInfoStatus == ErrorCodes::NamespaceNotFound) { // If the database doesn't exist, we successfully return an empty result set without // creating a cursor. return CursorId(0); - } else if (!scopedCMStatus.isOK()) { - return scopedCMStatus.getStatus(); + } else if (!routingInfoStatus.isOK()) { + return routingInfoStatus.getStatus(); } - const auto& scopedCM = scopedCMStatus.getValue(); + auto& routingInfo = routingInfoStatus.getValue(); - auto cursorId = runQueryWithoutRetrying( - txn, query, readPref, scopedCM.cm().get(), scopedCM.primary(), results, viewDefinition); + auto cursorId = runQueryWithoutRetrying(opCtx, + query, + readPref, + routingInfo.cm().get(), + routingInfo.primary(), + results, + viewDefinition); if (cursorId.isOK()) { return cursorId; } - auto status = std::move(cursorId.getStatus()); + + const auto& status = cursorId.getStatus(); if (!ErrorCodes::isStaleShardingError(status.code()) && status != ErrorCodes::ShardNotFound) { @@ -350,11 +357,7 @@ StatusWith<CursorId> ClusterFind::runQuery(OperationContext* txn, << " on attempt " << retries << " of " << kMaxStaleConfigRetries << ": " << redact(status); - if (status == ErrorCodes::StaleEpoch) { - Grid::get(txn)->catalogCache()->invalidate(query.nss().db().toString()); - } else { - scopedCM.db()->getChunkManagerIfExists(txn, query.nss().ns(), true); - } + catalogCache->onStaleConfigError(std::move(routingInfo)); } return {ErrorCodes::StaleShardVersion, @@ -362,11 +365,11 @@ StatusWith<CursorId> ClusterFind::runQuery(OperationContext* txn, << " times without successfully establishing shard version."}; } -StatusWith<CursorResponse> ClusterFind::runGetMore(OperationContext* txn, +StatusWith<CursorResponse> ClusterFind::runGetMore(OperationContext* opCtx, const GetMoreRequest& request) { - auto cursorManager = Grid::get(txn)->getCursorManager(); + auto cursorManager = Grid::get(opCtx)->getCursorManager(); - auto pinnedCursor = cursorManager->checkOutCursor(request.nss, request.cursorid, txn); + auto pinnedCursor = cursorManager->checkOutCursor(request.nss, request.cursorid, opCtx); if (!pinnedCursor.isOK()) { return pinnedCursor.getStatus(); } diff --git a/src/mongo/s/sharding_raii.cpp b/src/mongo/s/sharding_raii.cpp deleted file mode 100644 index ea50d5ce128..00000000000 --- a/src/mongo/s/sharding_raii.cpp +++ /dev/null @@ -1,159 +0,0 @@ -/** - * Copyright (C) 2016 MongoDB Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the GNU Affero General Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#include "mongo/platform/basic.h" - -#include "mongo/s/sharding_raii.h" - -#include "mongo/base/status_with.h" -#include "mongo/s/catalog/sharding_catalog_client.h" -#include "mongo/s/catalog_cache.h" -#include "mongo/s/chunk_manager.h" -#include "mongo/s/client/shard_registry.h" -#include "mongo/s/grid.h" - -namespace mongo { - -using std::shared_ptr; - -ScopedShardDatabase::ScopedShardDatabase(std::shared_ptr<DBConfig> db) : _db(db) { - invariant(_db); -} - -ScopedShardDatabase::~ScopedShardDatabase() = default; - -StatusWith<ScopedShardDatabase> ScopedShardDatabase::getExisting(OperationContext* txn, - StringData dbName) { - auto dbStatus = Grid::get(txn)->catalogCache()->getDatabase(txn, dbName.toString()); - if (!dbStatus.isOK()) { - return {dbStatus.getStatus().code(), - str::stream() << "Database " << dbName << " was not found due to " - << dbStatus.getStatus().toString()}; - } - - return {ScopedShardDatabase(std::move(dbStatus.getValue()))}; -} - -StatusWith<ScopedShardDatabase> ScopedShardDatabase::getOrCreate(OperationContext* txn, - StringData dbName) { - auto dbStatus = getExisting(txn, dbName); - if (dbStatus.isOK()) { - return dbStatus; - } - - if (dbStatus == ErrorCodes::NamespaceNotFound) { - auto statusCreateDb = - Grid::get(txn)->catalogClient(txn)->createDatabase(txn, dbName.toString()); - if (statusCreateDb.isOK() || statusCreateDb == ErrorCodes::NamespaceExists) { - return getExisting(txn, dbName); - } - - return statusCreateDb; - } - - return dbStatus.getStatus(); -} - -ScopedChunkManager::ScopedChunkManager(ScopedShardDatabase db, std::shared_ptr<ChunkManager> cm) - : _db(std::move(db)), _cm(std::move(cm)) {} - -ScopedChunkManager::ScopedChunkManager(ScopedShardDatabase db, std::shared_ptr<Shard> primary) - : _db(std::move(db)), _primary(std::move(primary)) {} - -ScopedChunkManager::~ScopedChunkManager() = default; - -StatusWith<ScopedChunkManager> ScopedChunkManager::get(OperationContext* txn, - const NamespaceString& nss) { - auto scopedDbStatus = ScopedShardDatabase::getExisting(txn, nss.db()); - if (!scopedDbStatus.isOK()) { - return scopedDbStatus.getStatus(); - } - - auto scopedDb = std::move(scopedDbStatus.getValue()); - - auto cm = scopedDb.db()->getChunkManagerIfExists(txn, nss.ns()); - if (cm) { - return {ScopedChunkManager(std::move(scopedDb), std::move(cm))}; - } - - auto shardStatus = - Grid::get(txn)->shardRegistry()->getShard(txn, scopedDb.db()->getPrimaryId()); - if (!shardStatus.isOK()) { - return {ErrorCodes::fromInt(40371), - str::stream() << "The primary shard for collection " << nss.ns() - << " could not be loaded due to error " - << shardStatus.getStatus().toString()}; - } - - return {ScopedChunkManager(std::move(scopedDb), std::move(shardStatus.getValue()))}; -} - -StatusWith<ScopedChunkManager> ScopedChunkManager::getOrCreate(OperationContext* txn, - const NamespaceString& nss) { - auto scopedDbStatus = ScopedShardDatabase::getOrCreate(txn, nss.db()); - if (!scopedDbStatus.isOK()) { - return scopedDbStatus.getStatus(); - } - - return ScopedChunkManager::get(txn, nss); -} - -StatusWith<ScopedChunkManager> ScopedChunkManager::refreshAndGet(OperationContext* txn, - const NamespaceString& nss) { - auto scopedDbStatus = ScopedShardDatabase::getExisting(txn, nss.db()); - if (!scopedDbStatus.isOK()) { - return scopedDbStatus.getStatus(); - } - - auto scopedDb = std::move(scopedDbStatus.getValue()); - - try { - std::shared_ptr<ChunkManager> cm = - scopedDb.db()->getChunkManager(txn, nss.ns(), true, false); - - if (!cm) { - return {ErrorCodes::NamespaceNotSharded, - str::stream() << "Collection " << nss.ns() - << " does not exist or is not sharded."}; - } - - if (cm->getChunkMap().empty()) { - return {ErrorCodes::NamespaceNotSharded, - str::stream() << "Collection " << nss.ns() - << " is marked as sharded, but does not have any chunks. This " - "most likely indicates a corrupted metadata or " - "partially completed 'shardCollection' command."}; - } - - return {ScopedChunkManager(std::move(scopedDb), std::move(cm))}; - } catch (const AssertionException& e) { - return e.toStatus(); - } -} - -} // namespace mongo diff --git a/src/mongo/s/sharding_raii.h b/src/mongo/s/sharding_raii.h deleted file mode 100644 index 92d5858f36b..00000000000 --- a/src/mongo/s/sharding_raii.h +++ /dev/null @@ -1,152 +0,0 @@ -/** - * Copyright (C) 2016 MongoDB Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the GNU Affero General Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#pragma once - -#include "mongo/base/disallow_copying.h" -#include "mongo/s/chunk_manager.h" -#include "mongo/s/config.h" - -namespace mongo { - -class OperationContext; - -class ScopedShardDatabase { - MONGO_DISALLOW_COPYING(ScopedShardDatabase); - -public: - ScopedShardDatabase(ScopedShardDatabase&&) = default; - ~ScopedShardDatabase(); - - /** - * Ensures that the specified database exists in the cache and if it does, returns it. - * Otherwise, either returns NamespaceNotFound if the database does not exist, or any other - * error code indicating why the database could not be loaded. - */ - static StatusWith<ScopedShardDatabase> getExisting(OperationContext* txn, StringData dbName); - - /** - * If the specified database exists already, loads it in the cache (if not already there) and - * returns it. Otherwise, if it does not exis, this call will implicitly create it as - * non-sharded. - */ - static StatusWith<ScopedShardDatabase> getOrCreate(OperationContext* txn, StringData dbName); - - /** - * Returns the underlying database cache entry. - */ - DBConfig* db() const { - return _db.get(); - } - - /** - * This method is here only for compatibility with the legacy M/R code, which requires a shared - * reference to the underlying database. It should not be used in new code. - */ - std::shared_ptr<DBConfig> getSharedDbReference() const { - return _db; - } - -private: - explicit ScopedShardDatabase(std::shared_ptr<DBConfig> db); - - // Reference to the corresponding database. Never null. - std::shared_ptr<DBConfig> _db; -}; - -class ScopedChunkManager { - MONGO_DISALLOW_COPYING(ScopedChunkManager); - -public: - ScopedChunkManager(ScopedChunkManager&&) = default; - ~ScopedChunkManager(); - - /** - * If the specified namespace is sharded, returns a ScopedChunkManager initialized with that - * collection's routing information. If it is not, the object returned is initialized with the - * database primary node on which the unsharded collection must reside. - * - * Returns NamespaceNotFound if the database does not exist, or any other error indicating - * problem communicating with the config server. - */ - static StatusWith<ScopedChunkManager> get(OperationContext* txn, const NamespaceString& nss); - - /** - * If the database holding the specified namespace does not exist, creates it and then behaves - * like the 'get' method above. - */ - static StatusWith<ScopedChunkManager> getOrCreate(OperationContext* txn, - const NamespaceString& nss); - - /** - * If the specified database and collection do not exist in the cache, tries to load them from - * the config server and returns a reference. If they are already in the cache, makes a call to - * the config server to check if there are any incremental updates to the collection chunk - * metadata and if so incorporates those. Otherwise, if it does not exist or any other error - * occurs, passes that error back. - */ - static StatusWith<ScopedChunkManager> refreshAndGet(OperationContext* txn, - const NamespaceString& nss); - - /** - * Returns the underlying database for which we hold reference. - */ - DBConfig* db() const { - return _db.db(); - } - - /** - * If the collection is sharded, returns a chunk manager for it. Otherwise, nullptr. - */ - std::shared_ptr<ChunkManager> cm() const { - return _cm; - } - - /** - * If the collection is not sharded, returns its primary shard. Otherwise, nullptr. - */ - std::shared_ptr<Shard> primary() const { - return _primary; - } - -private: - ScopedChunkManager(ScopedShardDatabase db, std::shared_ptr<ChunkManager> cm); - - ScopedChunkManager(ScopedShardDatabase db, std::shared_ptr<Shard> primary); - - // Scoped reference to the owning database. - ScopedShardDatabase _db; - - // Reference to the corresponding chunk manager (if sharded) or null - std::shared_ptr<ChunkManager> _cm; - - // Reference to the primary of the database (if not sharded) or null - std::shared_ptr<Shard> _primary; -}; - -} // namespace mongo |