/*
* Copyright (C) 2010 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the GNU Affero General Public License in all respects
* for all of the code used other than as permitted herein. If you modify
* file(s) with this exception, you may extend this exception to your
* version of the file(s), but you are not obligated to do so. If you do not
* wish to do so, delete this exception statement from your version. If you
* delete this exception statement from all source files in the program,
* then also delete it in the license file.
*/
#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding
#include "mongo/platform/basic.h"
#include "mongo/s/strategy.h"
#include "mongo/base/data_cursor.h"
#include "mongo/base/owned_pointer_vector.h"
#include "mongo/base/status.h"
#include "mongo/bson/util/builder.h"
#include "mongo/bson/util/bson_extract.h"
#include "mongo/client/connpool.h"
#include "mongo/client/dbclientcursor.h"
#include "mongo/client/parallel.h"
#include "mongo/db/audit.h"
#include "mongo/db/auth/action_type.h"
#include "mongo/db/auth/authorization_session.h"
#include "mongo/db/commands.h"
#include "mongo/db/max_time.h"
#include "mongo/db/server_parameters.h"
#include "mongo/db/matcher/extensions_callback_noop.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/query/find_common.h"
#include "mongo/db/query/lite_parsed_query.h"
#include "mongo/db/query/getmore_request.h"
#include "mongo/db/stats/counters.h"
#include "mongo/rpc/metadata/server_selection_metadata.h"
#include "mongo/s/bson_serializable.h"
#include "mongo/s/catalog/catalog_cache.h"
#include "mongo/s/client/shard_connection.h"
#include "mongo/s/client/shard_registry.h"
#include "mongo/s/client/version_manager.h"
#include "mongo/s/cluster_explain.h"
#include "mongo/s/chunk_manager.h"
#include "mongo/s/chunk_version.h"
#include "mongo/s/config.h"
#include "mongo/s/grid.h"
#include "mongo/s/query/cluster_cursor_manager.h"
#include "mongo/s/query/cluster_find.h"
#include "mongo/s/request.h"
#include "mongo/s/stale_exception.h"
#include "mongo/s/write_ops/batched_command_request.h"
#include "mongo/s/write_ops/batch_upconvert.h"
#include "mongo/util/log.h"
#include "mongo/util/mongoutils/str.h"
#include "mongo/util/timer.h"
namespace mongo {
using std::unique_ptr;
using std::shared_ptr;
using std::set;
using std::string;
using std::stringstream;
using std::vector;
namespace {
void runAgainstRegistered(OperationContext* txn,
const char* ns,
BSONObj& jsobj,
BSONObjBuilder& anObjBuilder,
int queryOptions) {
// It should be impossible for this uassert to fail since there should be no way to get
// into this function with any other collection name.
uassert(16618,
"Illegal attempt to run a command against a namespace other than $cmd.",
nsToCollectionSubstring(ns) == "$cmd");
BSONElement e = jsobj.firstElement();
std::string commandName = e.fieldName();
Command* c = e.type() ? Command::findCommand(commandName) : NULL;
if (!c) {
Command::appendCommandStatus(
anObjBuilder, false, str::stream() << "no such cmd: " << commandName);
anObjBuilder.append("code", ErrorCodes::CommandNotFound);
Command::unknownCommands.increment();
return;
}
Command::execCommandClientBasic(txn, c, cc(), queryOptions, ns, jsobj, anObjBuilder);
}
} // namespace
void Strategy::queryOp(OperationContext* txn, Request& request) {
verify(!NamespaceString(request.getns()).isCommand());
globalOpCounters.gotQuery();
QueryMessage q(request.d());
NamespaceString ns(q.ns);
ClientBasic* client = txn->getClient();
AuthorizationSession* authSession = AuthorizationSession::get(client);
Status status = authSession->checkAuthForFind(ns, false);
audit::logQueryAuthzCheck(client, ns, q.query, status.code());
uassertStatusOK(status);
LOG(3) << "query: " << q.ns << " " << q.query << " ntoreturn: " << q.ntoreturn
<< " options: " << q.queryOptions;
if (q.ntoreturn == 1 && strstr(q.ns, ".$cmd"))
throw UserException(8010, "something is wrong, shouldn't see a command here");
if (q.queryOptions & QueryOption_Exhaust) {
uasserted(18526,
string("the 'exhaust' query option is invalid for mongos queries: ") + q.ns +
" " + q.query.toString());
}
// Determine the default read preference mode based on the value of the slaveOk flag.
ReadPreference readPreferenceOption = (q.queryOptions & QueryOption_SlaveOk)
? ReadPreference::SecondaryPreferred
: ReadPreference::PrimaryOnly;
ReadPreferenceSetting readPreference(readPreferenceOption, TagSet());
BSONElement rpElem;
auto readPrefExtractStatus = bsonExtractTypedField(
q.query, LiteParsedQuery::kWrappedReadPrefField, mongo::Object, &rpElem);
if (readPrefExtractStatus.isOK()) {
auto parsedRps = ReadPreferenceSetting::fromBSON(rpElem.Obj());
uassertStatusOK(parsedRps.getStatus());
readPreference = parsedRps.getValue();
} else if (readPrefExtractStatus != ErrorCodes::NoSuchKey) {
uassertStatusOK(readPrefExtractStatus);
}
auto canonicalQuery = CanonicalQuery::canonicalize(q, ExtensionsCallbackNoop());
uassertStatusOK(canonicalQuery.getStatus());
// If the $explain flag was set, we must run the operation on the shards as an explain command
// rather than a find command.
if (canonicalQuery.getValue()->getParsed().isExplain()) {
const LiteParsedQuery& lpq = canonicalQuery.getValue()->getParsed();
BSONObj findCommand = lpq.asFindCommand();
// We default to allPlansExecution verbosity.
auto verbosity = ExplainCommon::EXEC_ALL_PLANS;
const bool secondaryOk = (readPreference.pref != ReadPreference::PrimaryOnly);
rpc::ServerSelectionMetadata metadata(secondaryOk, readPreference);
BSONObjBuilder explainBuilder;
uassertStatusOK(
Strategy::explainFind(txn, findCommand, lpq, verbosity, metadata, &explainBuilder));
BSONObj explainObj = explainBuilder.done();
replyToQuery(0, // query result flags
request.p(),
request.m(),
static_cast(explainObj.objdata()),
explainObj.objsize(),
1, // numResults
0, // startingFrom
CursorId(0));
return;
}
// Do the work to generate the first batch of results. This blocks waiting to get responses from
// the shard(s).
std::vector batch;
// 0 means the cursor is exhausted. Otherwise we assume that a cursor with the returned id can
// be retrieved via the ClusterCursorManager.
auto cursorId = ClusterFind::runQuery(txn, *canonicalQuery.getValue(), readPreference, &batch);
uassertStatusOK(cursorId.getStatus());
// Fill out the response buffer.
int numResults = 0;
OpQueryReplyBuilder reply;
for (auto&& obj : batch) {
obj.appendSelfToBufBuilder(reply.bufBuilderForResults());
numResults++;
}
reply.send(request.p(),
0, // query result flags
request.m(),
numResults,
0, // startingFrom
cursorId.getValue());
}
void Strategy::clientCommandOp(OperationContext* txn, Request& request) {
QueryMessage q(request.d());
LOG(3) << "command: " << q.ns << " " << q.query << " ntoreturn: " << q.ntoreturn
<< " options: " << q.queryOptions;
if (q.queryOptions & QueryOption_Exhaust) {
uasserted(18527,
string("the 'exhaust' query option is invalid for mongos commands: ") + q.ns +
" " + q.query.toString());
}
NamespaceString nss(request.getns());
// Regular queries are handled in strategy_shard.cpp
verify(nss.isCommand() || nss.isSpecialCommand());
if (handleSpecialNamespaces(txn, request, q))
return;
int loops = 5;
while (true) {
try {
BSONObj cmdObj = q.query;
{
BSONElement e = cmdObj.firstElement();
if (e.type() == Object &&
(e.fieldName()[0] == '$' ? str::equals("query", e.fieldName() + 1)
: str::equals("query", e.fieldName()))) {
// Extract the embedded query object.
if (cmdObj.hasField(Query::ReadPrefField.name())) {
// The command has a read preference setting. We don't want
// to lose this information so we copy this to a new field
// called $queryOptions.$readPreference
BSONObjBuilder finalCmdObjBuilder;
finalCmdObjBuilder.appendElements(e.embeddedObject());
BSONObjBuilder queryOptionsBuilder(
finalCmdObjBuilder.subobjStart("$queryOptions"));
queryOptionsBuilder.append(cmdObj[Query::ReadPrefField.name()]);
queryOptionsBuilder.done();
cmdObj = finalCmdObjBuilder.obj();
} else {
cmdObj = e.embeddedObject();
}
}
}
OpQueryReplyBuilder reply;
{
BSONObjBuilder builder(reply.bufBuilderForResults());
runAgainstRegistered(txn, q.ns, cmdObj, builder, q.queryOptions);
}
reply.sendCommandReply(request.p(), request.m());
return;
} catch (const StaleConfigException& e) {
if (loops <= 0)
throw e;
loops--;
log() << "retrying command: " << q.query;
// For legacy reasons, ns may not actually be set in the exception :-(
string staleNS = e.getns();
if (staleNS.size() == 0)
staleNS = q.ns;
ShardConnection::checkMyConnectionVersions(txn, staleNS);
if (loops < 4)
versionManager.forceRemoteCheckShardVersionCB(txn, staleNS);
} catch (const DBException& e) {
OpQueryReplyBuilder reply;
{
BSONObjBuilder builder(reply.bufBuilderForResults());
Command::appendCommandStatus(builder, e.toStatus());
}
reply.sendCommandReply(request.p(), request.m());
return;
}
}
}
// TODO: remove after MongoDB 3.2
bool Strategy::handleSpecialNamespaces(OperationContext* txn, Request& request, QueryMessage& q) {
const char* ns = strstr(request.getns(), ".$cmd.sys.");
if (!ns)
return false;
ns += 10;
BSONObjBuilder reply;
const auto upgradeToRealCommand = [txn, &q, &reply](StringData commandName) {
BSONObjBuilder cmdBob;
cmdBob.append(commandName, 1);
cmdBob.appendElements(q.query); // fields are validated by Commands
auto interposedCmd = cmdBob.done();
// Rewrite upgraded pseudoCommands to run on the 'admin' database.
NamespaceString interposedNss("admin", "$cmd");
runAgainstRegistered(txn, interposedNss.ns().c_str(), interposedCmd, reply, q.queryOptions);
};
if (strcmp(ns, "inprog") == 0) {
upgradeToRealCommand("currentOp");
} else if (strcmp(ns, "killop") == 0) {
upgradeToRealCommand("killOp");
} else if (strcmp(ns, "unlock") == 0) {
reply.append("err", "can't do unlock through mongos");
} else {
warning() << "unknown sys command [" << ns << "]";
return false;
}
BSONObj x = reply.done();
replyToQuery(0, request.p(), request.m(), x);
return true;
}
void Strategy::commandOp(OperationContext* txn,
const string& db,
const BSONObj& command,
int options,
const string& versionedNS,
const BSONObj& targetingQuery,
vector* results) {
QuerySpec qSpec(db + ".$cmd", command, BSONObj(), 0, 1, options);
ParallelSortClusteredCursor cursor(qSpec, CommandInfo(versionedNS, targetingQuery));
// Initialize the cursor
cursor.init(txn);
set shardIds;
cursor.getQueryShardIds(shardIds);
for (const ShardId& shardId : shardIds) {
CommandResult result;
result.shardTargetId = shardId;
result.target = fassertStatusOK(
34417, ConnectionString::parse(cursor.getShardCursor(shardId)->originalHost()));
result.result = cursor.getShardCursor(shardId)->peekFirst().getOwned();
results->push_back(result);
}
}
void Strategy::getMore(OperationContext* txn, Request& request) {
const char* ns = request.getns();
const int ntoreturn = request.d().pullInt();
uassert(
34424, str::stream() << "Invalid ntoreturn for OP_GET_MORE: " << ntoreturn, ntoreturn >= 0);
const long long id = request.d().pullInt64();
// TODO: Handle stale config exceptions here from coll being dropped or sharded during op for
// now has same semantics as legacy request.
const NamespaceString nss(ns);
auto statusGetDb = grid.catalogCache()->getDatabase(txn, nss.db().toString());
if (statusGetDb == ErrorCodes::NamespaceNotFound) {
replyToQuery(ResultFlag_CursorNotFound, request.p(), request.m(), 0, 0, 0);
return;
}
uassertStatusOK(statusGetDb);
boost::optional batchSize;
if (ntoreturn) {
batchSize = ntoreturn;
}
GetMoreRequest getMoreRequest(
NamespaceString(ns), id, batchSize, boost::none, boost::none, boost::none);
auto cursorResponse = ClusterFind::runGetMore(txn, getMoreRequest);
if (cursorResponse == ErrorCodes::CursorNotFound) {
replyToQuery(ResultFlag_CursorNotFound, request.p(), request.m(), 0, 0, 0);
return;
}
uassertStatusOK(cursorResponse.getStatus());
// Build the response document.
BufBuilder buffer(FindCommon::kInitReplyBufferSize);
int numResults = 0;
for (const auto& obj : cursorResponse.getValue().getBatch()) {
buffer.appendBuf((void*)obj.objdata(), obj.objsize());
++numResults;
}
replyToQuery(0,
request.p(),
request.m(),
buffer.buf(),
buffer.len(),
numResults,
cursorResponse.getValue().getNumReturnedSoFar().value_or(0),
cursorResponse.getValue().getCursorId());
}
void Strategy::killCursors(OperationContext* txn, Request& request) {
DbMessage& dbMessage = request.d();
const int numCursors = dbMessage.pullInt();
massert(34425,
str::stream() << "Invalid killCursors message. numCursors: " << numCursors
<< ", message size: " << dbMessage.msg().dataSize() << ".",
dbMessage.msg().dataSize() == 8 + (8 * numCursors));
uassert(28794,
str::stream() << "numCursors must be between 1 and 29999. numCursors: " << numCursors
<< ".",
numCursors >= 1 && numCursors < 30000);
ConstDataCursor cursors(dbMessage.getArray(numCursors));
Client* client = txn->getClient();
AuthorizationSession* authSession = AuthorizationSession::get(client);
ClusterCursorManager* manager = grid.getCursorManager();
for (int i = 0; i < numCursors; ++i) {
CursorId cursorId = cursors.readAndAdvance>();
boost::optional nss = manager->getNamespaceForCursorId(cursorId);
if (!nss) {
LOG(3) << "Can't find cursor to kill. Cursor id: " << cursorId << ".";
continue;
}
Status authorizationStatus = authSession->checkAuthForKillCursors(*nss, cursorId);
audit::logKillCursorsAuthzCheck(client,
*nss,
cursorId,
authorizationStatus.isOK() ? ErrorCodes::OK
: ErrorCodes::Unauthorized);
if (!authorizationStatus.isOK()) {
LOG(3) << "Not authorized to kill cursor. Namespace: '" << *nss
<< "', cursor id: " << cursorId << ".";
continue;
}
Status killCursorStatus = manager->killCursor(*nss, cursorId);
if (!killCursorStatus.isOK()) {
LOG(3) << "Can't find cursor to kill. Namespace: '" << *nss
<< "', cursor id: " << cursorId << ".";
continue;
}
LOG(3) << "Killed cursor. Namespace: '" << *nss << "', cursor id: " << cursorId << ".";
}
}
void Strategy::writeOp(OperationContext* txn, int op, Request& request) {
// make sure we have a last error
dassert(&LastError::get(cc()));
OwnedPointerVector commandRequestsOwned;
vector& commandRequests = commandRequestsOwned.mutableVector();
msgToBatchRequests(request.m(), &commandRequests);
for (vector::iterator it = commandRequests.begin();
it != commandRequests.end();
++it) {
// Multiple commands registered to last error as multiple requests
if (it != commandRequests.begin())
LastError::get(cc()).startRequest();
BatchedCommandRequest* commandRequest = *it;
// Adjust namespaces for command
NamespaceString fullNS(commandRequest->getNS());
string cmdNS = fullNS.getCommandNS();
// We only pass in collection name to command
commandRequest->setNS(fullNS);
BSONObjBuilder builder;
BSONObj requestBSON = commandRequest->toBSON();
{
// Disable the last error object for the duration of the write cmd
LastError::Disabled disableLastError(&LastError::get(cc()));
runAgainstRegistered(txn, cmdNS.c_str(), requestBSON, builder, 0);
}
BatchedCommandResponse commandResponse;
bool parsed = commandResponse.parseBSON(builder.done(), NULL);
(void)parsed; // for compile
dassert(parsed && commandResponse.isValid(NULL));
// Populate the lastError object based on the write response
LastError::get(cc()).reset();
bool hadError =
batchErrorToLastError(*commandRequest, commandResponse, &LastError::get(cc()));
// Check if this is an ordered batch and we had an error which should stop processing
if (commandRequest->getOrdered() && hadError)
break;
}
}
Status Strategy::explainFind(OperationContext* txn,
const BSONObj& findCommand,
const LiteParsedQuery& lpq,
ExplainCommon::Verbosity verbosity,
const rpc::ServerSelectionMetadata& serverSelectionMetadata,
BSONObjBuilder* out) {
BSONObjBuilder explainCmdBob;
int options = 0;
ClusterExplain::wrapAsExplain(
findCommand, verbosity, serverSelectionMetadata, &explainCmdBob, &options);
// We will time how long it takes to run the commands on the shards.
Timer timer;
std::vector shardResults;
Strategy::commandOp(txn,
lpq.nss().db().toString(),
explainCmdBob.obj(),
options,
lpq.nss().toString(),
lpq.getFilter(),
&shardResults);
long long millisElapsed = timer.millis();
const char* mongosStageName = ClusterExplain::getStageNameForReadOp(shardResults, findCommand);
return ClusterExplain::buildExplainResult(
txn, shardResults, mongosStageName, millisElapsed, out);
}
}