summaryrefslogtreecommitdiff
path: root/src/mongo/db/ops/query.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/ops/query.cpp')
-rw-r--r--src/mongo/db/ops/query.cpp870
1 files changed, 870 insertions, 0 deletions
diff --git a/src/mongo/db/ops/query.cpp b/src/mongo/db/ops/query.cpp
new file mode 100644
index 00000000000..15e3ed9053f
--- /dev/null
+++ b/src/mongo/db/ops/query.cpp
@@ -0,0 +1,870 @@
+// query.cpp
+
+/**
+ * Copyright (C) 2008 10gen Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "pch.h"
+#include "query.h"
+#include "../pdfile.h"
+#include "../jsobjmanipulator.h"
+#include "../../bson/util/builder.h"
+#include <time.h>
+#include "../introspect.h"
+#include "../btree.h"
+#include "../../util/lruishmap.h"
+#include "../json.h"
+#include "../repl.h"
+#include "../replutil.h"
+#include "../scanandorder.h"
+#include "../security.h"
+#include "../curop-inl.h"
+#include "../commands.h"
+#include "../queryoptimizer.h"
+#include "../lasterror.h"
+#include "../../s/d_logic.h"
+#include "../repl_block.h"
+#include "../../server.h"
+#include "../d_concurrency.h"
+
+namespace mongo {
+
+ /* We cut off further objects once we cross this threshold; thus, you might get
+ a little bit more than this, it is a threshold rather than a limit.
+ */
+ const int MaxBytesToReturnToClientAtOnce = 4 * 1024 * 1024;
+
+ //ns->query->DiskLoc
+// LRUishMap<BSONObj,DiskLoc,5> lrutest(123);
+
+ extern bool useCursors;
+ extern bool useHints;
+
+ bool runCommands(const char *ns, BSONObj& jsobj, CurOp& curop, BufBuilder &b, BSONObjBuilder& anObjBuilder, bool fromRepl, int queryOptions) {
+ try {
+ return _runCommands(ns, jsobj, b, anObjBuilder, fromRepl, queryOptions);
+ }
+ catch( SendStaleConfigException& ){
+ throw;
+ }
+ catch ( AssertionException& e ) {
+ assert( e.getCode() != SendStaleConfigCode && e.getCode() != RecvStaleConfigCode );
+
+ e.getInfo().append( anObjBuilder , "assertion" , "assertionCode" );
+ curop.debug().exceptionInfo = e.getInfo();
+ }
+ anObjBuilder.append("errmsg", "db assertion failure");
+ anObjBuilder.append("ok", 0.0);
+ BSONObj x = anObjBuilder.done();
+ b.appendBuf((void*) x.objdata(), x.objsize());
+ return true;
+ }
+
+
+ BSONObj id_obj = fromjson("{\"_id\":1}");
+ BSONObj empty_obj = fromjson("{}");
+
+
+ //int dump = 0;
+
+ /* empty result for error conditions */
+ QueryResult* emptyMoreResult(long long cursorid) {
+ BufBuilder b(32768);
+ b.skip(sizeof(QueryResult));
+ QueryResult *qr = (QueryResult *) b.buf();
+ qr->cursorId = 0; // 0 indicates no more data to retrieve.
+ qr->startingFrom = 0;
+ qr->len = b.len();
+ qr->setOperation(opReply);
+ qr->initializeResultFlags();
+ qr->nReturned = 0;
+ b.decouple();
+ return qr;
+ }
+
+ QueryResult* processGetMore(const char *ns, int ntoreturn, long long cursorid , CurOp& curop, int pass, bool& exhaust ) {
+ exhaust = false;
+ ClientCursor::Pointer p(cursorid);
+ ClientCursor *cc = p.c();
+
+ int bufSize = 512 + sizeof( QueryResult ) + MaxBytesToReturnToClientAtOnce;
+
+ BufBuilder b( bufSize );
+ b.skip(sizeof(QueryResult));
+ int resultFlags = ResultFlag_AwaitCapable;
+ int start = 0;
+ int n = 0;
+
+ if ( unlikely(!cc) ) {
+ LOGSOME << "getMore: cursorid not found " << ns << " " << cursorid << endl;
+ cursorid = 0;
+ resultFlags = ResultFlag_CursorNotFound;
+ }
+ else {
+ // check for spoofing of the ns such that it does not match the one originally there for the cursor
+ uassert(14833, "auth error", str::equals(ns, cc->ns().c_str()));
+
+ if ( pass == 0 )
+ cc->updateSlaveLocation( curop );
+
+ int queryOptions = cc->queryOptions();
+
+ curop.debug().query = cc->query();
+
+ start = cc->pos();
+ Cursor *c = cc->c();
+ c->checkLocation();
+ DiskLoc last;
+
+ scoped_ptr<Projection::KeyOnly> keyFieldsOnly;
+ if ( cc->modifiedKeys() == false && cc->isMultiKey() == false && cc->fields )
+ keyFieldsOnly.reset( cc->fields->checkKey( cc->indexKeyPattern() ) );
+
+ // This manager may be stale, but it's the state of chunking when the cursor was created.
+ ShardChunkManagerPtr manager = cc->getChunkManager();
+
+ while ( 1 ) {
+ if ( !c->ok() ) {
+ if ( c->tailable() ) {
+ /* when a tailable cursor hits "EOF", ok() goes false, and current() is null. however
+ advance() can still be retries as a reactivation attempt. when there is new data, it will
+ return true. that's what we are doing here.
+ */
+ if ( c->advance() )
+ continue;
+
+ if( n == 0 && (queryOptions & QueryOption_AwaitData) && pass < 1000 ) {
+ return 0;
+ }
+
+ break;
+ }
+ p.release();
+ bool ok = ClientCursor::erase(cursorid);
+ assert(ok);
+ cursorid = 0;
+ cc = 0;
+ break;
+ }
+
+ // in some cases (clone collection) there won't be a matcher
+ if ( c->matcher() && !c->matcher()->matchesCurrent( c ) ) {
+ }
+ else if ( manager && ! manager->belongsToMe( cc ) ){
+ LOG(2) << "cursor skipping document in un-owned chunk: " << c->current() << endl;
+ }
+ else {
+ if( c->getsetdup(c->currLoc()) ) {
+ //out() << " but it's a dup \n";
+ }
+ else {
+ last = c->currLoc();
+ n++;
+
+ if ( keyFieldsOnly ) {
+ fillQueryResultFromObj(b, 0, keyFieldsOnly->hydrate( c->currKey() ) );
+ }
+ else {
+ BSONObj js = c->current();
+ // show disk loc should be part of the main query, not in an $or clause, so this should be ok
+ fillQueryResultFromObj(b, cc->fields.get(), js, ( cc->pq.get() && cc->pq->showDiskLoc() ? &last : 0));
+ }
+
+ if ( ( ntoreturn && n >= ntoreturn ) || b.len() > MaxBytesToReturnToClientAtOnce ) {
+ c->advance();
+ cc->incPos( n );
+ break;
+ }
+ }
+ }
+ c->advance();
+
+ if ( ! cc->yieldSometimes( ClientCursor::MaybeCovered ) ) {
+ ClientCursor::erase(cursorid);
+ cursorid = 0;
+ cc = 0;
+ p.deleted();
+ break;
+ }
+ }
+
+ if ( cc ) {
+ cc->updateLocation();
+ cc->mayUpgradeStorage();
+ cc->storeOpForSlave( last );
+ exhaust = cc->queryOptions() & QueryOption_Exhaust;
+ }
+ }
+
+ QueryResult *qr = (QueryResult *) b.buf();
+ qr->len = b.len();
+ qr->setOperation(opReply);
+ qr->_resultFlags() = resultFlags;
+ qr->cursorId = cursorid;
+ qr->startingFrom = start;
+ qr->nReturned = n;
+ b.decouple();
+
+ return qr;
+ }
+
+ class ExplainBuilder {
+ // Note: by default we filter out allPlans and oldPlan in the shell's
+ // explain() function. If you add any recursive structures, make sure to
+ // edit the JS to make sure everything gets filtered.
+ public:
+ ExplainBuilder() : _i() {}
+ void ensureStartScan() {
+ if ( !_a.get() ) {
+ _a.reset( new BSONArrayBuilder() );
+ }
+ }
+ void noteCursor( Cursor *c ) {
+ BSONObjBuilder b( _a->subobjStart() );
+ b << "cursor" << c->toString() << "indexBounds" << c->prettyIndexBounds();
+ b.done();
+ }
+ void noteScan( Cursor *c, long long nscanned, long long nscannedObjects, int n, bool scanAndOrder,
+ int millis, bool hint, int nYields , int nChunkSkips , bool indexOnly ) {
+ if ( _i == 1 ) {
+ _c.reset( new BSONArrayBuilder() );
+ *_c << _b->obj();
+ }
+ if ( _i == 0 ) {
+ _b.reset( new BSONObjBuilder() );
+ }
+ else {
+ _b.reset( new BSONObjBuilder( _c->subobjStart() ) );
+ }
+ *_b << "cursor" << c->toString();
+ _b->appendNumber( "nscanned", nscanned );
+ _b->appendNumber( "nscannedObjects", nscannedObjects );
+ *_b << "n" << n;
+
+ if ( scanAndOrder )
+ *_b << "scanAndOrder" << true;
+
+ *_b << "millis" << millis;
+
+ *_b << "nYields" << nYields;
+ *_b << "nChunkSkips" << nChunkSkips;
+ *_b << "isMultiKey" << c->isMultiKey();
+ *_b << "indexOnly" << indexOnly;
+
+ *_b << "indexBounds" << c->prettyIndexBounds();
+
+ c->explainDetails( *_b );
+
+ if ( !hint ) {
+ *_b << "allPlans" << _a->arr();
+ }
+ if ( _i != 0 ) {
+ _b->done();
+ }
+ _a.reset( 0 );
+ ++_i;
+ }
+ BSONObj finishWithSuffix( long long nscanned, long long nscannedObjects, int n, int millis, const BSONObj &suffix ) {
+ if ( _i > 1 ) {
+ BSONObjBuilder b;
+ b << "clauses" << _c->arr();
+ b.appendNumber( "nscanned", nscanned );
+ b.appendNumber( "nscannedObjects", nscannedObjects );
+ b << "n" << n;
+ b << "millis" << millis;
+ b.appendElements( suffix );
+ return b.obj();
+ }
+ else {
+ stringstream host;
+ host << getHostNameCached() << ":" << cmdLine.port;
+ *_b << "server" << host.str();
+ _b->appendElements( suffix );
+ return _b->obj();
+ }
+ }
+ private:
+ auto_ptr< BSONArrayBuilder > _a;
+ auto_ptr< BSONObjBuilder > _b;
+ auto_ptr< BSONArrayBuilder > _c;
+ int _i;
+ };
+
+ // Implements database 'query' requests using the query optimizer's QueryOp interface
+ class UserQueryOp : public QueryOp {
+ public:
+
+ UserQueryOp( const ParsedQuery& pq, Message &response, ExplainBuilder &eb, CurOp &curop ) :
+ _buf( 32768 ) , // TODO be smarter here
+ _pq( pq ) ,
+ _ntoskip( pq.getSkip() ) ,
+ _nscanned(0), _oldNscanned(0), _nscannedObjects(0), _oldNscannedObjects(0),
+ _n(0),
+ _oldN(0),
+ _nYields(),
+ _nChunkSkips(),
+ _chunkManager( shardingState.needShardChunkManager(pq.ns()) ?
+ shardingState.getShardChunkManager(pq.ns()) : ShardChunkManagerPtr() ),
+ _inMemSort(false),
+ _capped(false),
+ _saveClientCursor(false),
+ _wouldSaveClientCursor(false),
+ _oplogReplay( pq.hasOption( QueryOption_OplogReplay) ),
+ _response( response ),
+ _eb( eb ),
+ _curop( curop ),
+ _yieldRecoveryFailed()
+ {}
+
+ virtual void _init() {
+ // only need to put the QueryResult fields there if we're building the first buffer in the message.
+ if ( _response.empty() ) {
+ _buf.skip( sizeof( QueryResult ) );
+ }
+
+ if ( _oplogReplay ) {
+ _findingStartCursor.reset( new FindingStartCursor( qp() ) );
+ _capped = true;
+ }
+ else {
+ _c = qp().newCursor( DiskLoc() , _pq.getNumToReturn() + _pq.getSkip() );
+ _capped = _c->capped();
+
+ // setup check for if we can only use index to extract
+ if ( _c->modifiedKeys() == false && _c->isMultiKey() == false && _pq.getFields() ) {
+ _keyFieldsOnly.reset( _pq.getFields()->checkKey( _c->indexKeyPattern() ) );
+ }
+ }
+
+ if ( qp().scanAndOrderRequired() ) {
+ _inMemSort = true;
+ _so.reset( new ScanAndOrder( _pq.getSkip() , _pq.getNumToReturn() , _pq.getOrder(), qp().multikeyFrs() ) );
+ }
+
+ if ( _pq.isExplain() ) {
+ _eb.noteCursor( _c.get() );
+ }
+
+ }
+
+ virtual bool prepareToYield() {
+ if ( _findingStartCursor.get() ) {
+ return _findingStartCursor->prepareToYield();
+ }
+ else {
+ if ( _c && !_cc ) {
+ _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , _c , _pq.ns() ) );
+ }
+ if ( _cc ) {
+ return _cc->prepareToYield( _yieldData );
+ }
+ }
+ // no active cursor - ok to yield
+ return true;
+ }
+
+ virtual void recoverFromYield() {
+ _nYields++;
+
+ if ( _findingStartCursor.get() ) {
+ _findingStartCursor->recoverFromYield();
+ }
+ else if ( _cc && !ClientCursor::recoverFromYield( _yieldData ) ) {
+ _yieldRecoveryFailed = true;
+ _c.reset();
+ _cc.reset();
+ _so.reset();
+
+ if ( _capped ) {
+ msgassertedNoTrace( 13338, str::stream() << "capped cursor overrun during query: " << _pq.ns() );
+ }
+ else if ( qp().mustAssertOnYieldFailure() ) {
+ msgassertedNoTrace( 15890, str::stream() << "UserQueryOp::recoverFromYield() failed to recover: " << _pq.ns() );
+ }
+ else {
+ // we don't fail query since we're fine with returning partial data if collection dropped
+
+ // todo: this is wrong. the cursor could be gone if closeAllDatabases command just ran
+ }
+
+ }
+ }
+
+ virtual long long nscanned() {
+ if ( _findingStartCursor.get() ) {
+ return 0; // should only be one query plan, so value doesn't really matter.
+ }
+ return _c.get() ? _c->nscanned() : _nscanned;
+ }
+
+ virtual void next() {
+ if ( _findingStartCursor.get() ) {
+ if ( !_findingStartCursor->done() ) {
+ _findingStartCursor->next();
+ }
+ if ( _findingStartCursor->done() ) {
+ _c = _findingStartCursor->cursor();
+ _findingStartCursor.reset( 0 );
+ }
+ _capped = true;
+ return;
+ }
+
+ if ( !_c || !_c->ok() ) {
+ finish( false );
+ return;
+ }
+
+ bool mayCreateCursor1 = _pq.wantMore() && ! _inMemSort && _pq.getNumToReturn() != 1 && useCursors;
+
+ if( 0 ) {
+ cout << "SCANNING this: " << this << " key: " << _c->currKey() << " obj: " << _c->current() << endl;
+ }
+
+ if ( _pq.getMaxScan() && _nscanned >= _pq.getMaxScan() ) {
+ finish( true ); //?
+ return;
+ }
+
+ _nscanned = _c->nscanned();
+ if ( !matcher( _c )->matchesCurrent(_c.get() , &_details ) ) {
+ // not a match, continue onward
+ if ( _details._loadedObject )
+ _nscannedObjects++;
+ }
+ else {
+ _nscannedObjects++;
+ DiskLoc cl = _c->currLoc();
+ if ( _chunkManager && ! _chunkManager->belongsToMe( cl.obj() ) ) { // TODO: should make this covered at some point
+ _nChunkSkips++;
+ // log() << "TEMP skipping un-owned chunk: " << _c->current() << endl;
+ }
+ else if( _c->getsetdup(cl) ) {
+ // dup
+ }
+ else {
+ // got a match.
+
+ if ( _inMemSort ) {
+ // note: no cursors for non-indexed, ordered results. results must be fairly small.
+ _so->add( _pq.returnKey() ? _c->currKey() : _c->current(), _pq.showDiskLoc() ? &cl : 0 );
+ }
+ else if ( _ntoskip > 0 ) {
+ _ntoskip--;
+ }
+ else {
+ if ( _pq.isExplain() ) {
+ _n++;
+ if ( n() >= _pq.getNumToReturn() && !_pq.wantMore() ) {
+ // .limit() was used, show just that much.
+ finish( true ); //?
+ return;
+ }
+ }
+ else {
+
+ if ( _pq.returnKey() ) {
+ BSONObjBuilder bb( _buf );
+ bb.appendKeys( _c->indexKeyPattern() , _c->currKey() );
+ bb.done();
+ }
+ else if ( _keyFieldsOnly ) {
+ fillQueryResultFromObj( _buf , 0 , _keyFieldsOnly->hydrate( _c->currKey() ) );
+ }
+ else {
+ BSONObj js = _c->current();
+ assert( js.isValid() );
+
+ if ( _oplogReplay ) {
+ BSONElement e = js["ts"];
+ if ( e.type() == Date || e.type() == Timestamp )
+ _slaveReadTill = e._opTime();
+ }
+
+ fillQueryResultFromObj( _buf , _pq.getFields() , js , (_pq.showDiskLoc() ? &cl : 0));
+ }
+ _n++;
+ if ( ! _c->supportGetMore() ) {
+ if ( _pq.enough( n() ) || _buf.len() >= MaxBytesToReturnToClientAtOnce ) {
+ finish( true );
+ return;
+ }
+ }
+ else if ( _pq.enoughForFirstBatch( n() , _buf.len() ) ) {
+ /* if only 1 requested, no cursor saved for efficiency...we assume it is findOne() */
+ if ( mayCreateCursor1 ) {
+ _wouldSaveClientCursor = true;
+ if ( _c->advance() ) {
+ // more...so save a cursor
+ _saveClientCursor = true;
+ }
+ }
+ finish( true );
+ return;
+ }
+ }
+ }
+ }
+ }
+ _c->advance();
+ }
+
+ // this plan won, so set data for response broadly
+ void finish( bool stop ) {
+ massert( 13638, "client cursor dropped during explain query yield", !_pq.isExplain() || _c.get() );
+
+ if ( _pq.isExplain() ) {
+ _n = _inMemSort ? _so->size() : _n;
+ }
+ else if ( _inMemSort ) {
+ if( _so.get() )
+ _so->fill( _buf, _pq.getFields() , _n );
+ }
+
+ if ( _c.get() ) {
+ _nscanned = _c->nscanned();
+
+ if ( _pq.hasOption( QueryOption_CursorTailable ) && _pq.getNumToReturn() != 1 )
+ _c->setTailable();
+
+ // If the tailing request succeeded.
+ if ( _c->tailable() )
+ _saveClientCursor = true;
+ }
+
+ if ( _pq.isExplain() ) {
+ _eb.noteScan( _c.get(), _nscanned, _nscannedObjects, _n, scanAndOrderRequired(),
+ _curop.elapsedMillis(), useHints && !_pq.getHint().eoo(), _nYields ,
+ _nChunkSkips, _keyFieldsOnly.get() > 0 );
+ }
+ else {
+ if ( _buf.len() ) {
+ _response.appendData( _buf.buf(), _buf.len() );
+ _buf.decouple();
+ }
+ }
+
+ if ( stop ) {
+ setStop();
+ }
+ else {
+ setComplete();
+ }
+
+ }
+
+ void finishExplain( const BSONObj &suffix ) {
+ BSONObj obj = _eb.finishWithSuffix( totalNscanned(), nscannedObjects(), n(), _curop.elapsedMillis(), suffix);
+ fillQueryResultFromObj(_buf, 0, obj);
+ _n = 1;
+ _oldN = 0;
+ _response.appendData( _buf.buf(), _buf.len() );
+ _buf.decouple();
+ }
+
+ virtual bool mayRecordPlan() const {
+ return !_yieldRecoveryFailed && ( _pq.getNumToReturn() != 1 ) && ( ( _n > _pq.getNumToReturn() / 2 ) || ( complete() && !stopRequested() ) );
+ }
+
+ virtual QueryOp *_createChild() const {
+ if ( _pq.isExplain() ) {
+ _eb.ensureStartScan();
+ }
+ UserQueryOp *ret = new UserQueryOp( _pq, _response, _eb, _curop );
+ ret->_oldN = n();
+ ret->_oldNscanned = totalNscanned();
+ ret->_oldNscannedObjects = nscannedObjects();
+ ret->_ntoskip = _ntoskip;
+ return ret;
+ }
+
+ bool scanAndOrderRequired() const { return _inMemSort; }
+ shared_ptr<Cursor> cursor() { return _c; }
+ int n() const { return _oldN + _n; }
+ long long totalNscanned() const { return _nscanned + _oldNscanned; }
+ long long nscannedObjects() const { return _nscannedObjects + _oldNscannedObjects; }
+ bool saveClientCursor() const { return _saveClientCursor; }
+ bool wouldSaveClientCursor() const { return _wouldSaveClientCursor; }
+
+ void finishForOplogReplay( ClientCursor * cc ) {
+ if ( _oplogReplay && ! _slaveReadTill.isNull() )
+ cc->slaveReadTill( _slaveReadTill );
+
+ }
+
+ ShardChunkManagerPtr getChunkManager(){ return _chunkManager; }
+
+ private:
+ BufBuilder _buf;
+ const ParsedQuery& _pq;
+ scoped_ptr<Projection::KeyOnly> _keyFieldsOnly;
+
+ long long _ntoskip;
+ long long _nscanned;
+ long long _oldNscanned;
+ long long _nscannedObjects;
+ long long _oldNscannedObjects;
+ int _n; // found so far
+ int _oldN;
+
+ int _nYields;
+ int _nChunkSkips;
+
+ MatchDetails _details;
+
+ ShardChunkManagerPtr _chunkManager;
+
+ bool _inMemSort;
+ auto_ptr< ScanAndOrder > _so;
+
+ shared_ptr<Cursor> _c;
+ ClientCursor::CleanupPointer _cc;
+ ClientCursor::YieldData _yieldData;
+
+ bool _capped;
+ bool _saveClientCursor;
+ bool _wouldSaveClientCursor;
+ bool _oplogReplay;
+ auto_ptr< FindingStartCursor > _findingStartCursor;
+
+ Message &_response;
+ ExplainBuilder &_eb;
+ CurOp &_curop;
+ OpTime _slaveReadTill;
+
+ bool _yieldRecoveryFailed;
+ };
+
+ /* run a query -- includes checking for and running a Command \
+ @return points to ns if exhaust mode. 0=normal mode
+ */
+ const char *runQuery(Message& m, QueryMessage& q, CurOp& curop, Message &result) {
+ shared_ptr<ParsedQuery> pq_shared( new ParsedQuery(q) );
+ ParsedQuery& pq( *pq_shared );
+ int ntoskip = q.ntoskip;
+ BSONObj jsobj = q.query;
+ int queryOptions = q.queryOptions;
+ const char *ns = q.ns;
+
+ if( logLevel >= 2 )
+ log() << "runQuery called " << ns << " " << jsobj << endl;
+
+ curop.debug().ns = ns;
+ curop.debug().ntoreturn = pq.getNumToReturn();
+ curop.setQuery(jsobj);
+
+ if ( pq.couldBeCommand() ) {
+ BufBuilder bb;
+ bb.skip(sizeof(QueryResult));
+ BSONObjBuilder cmdResBuf;
+ if ( runCommands(ns, jsobj, curop, bb, cmdResBuf, false, queryOptions) ) {
+ curop.debug().iscommand = true;
+ curop.debug().query = jsobj;
+ curop.markCommand();
+
+ auto_ptr< QueryResult > qr;
+ qr.reset( (QueryResult *) bb.buf() );
+ bb.decouple();
+ qr->setResultFlagsToOk();
+ qr->len = bb.len();
+ curop.debug().responseLength = bb.len();
+ qr->setOperation(opReply);
+ qr->cursorId = 0;
+ qr->startingFrom = 0;
+ qr->nReturned = 1;
+ result.setData( qr.release(), true );
+ }
+ else {
+ uasserted(13530, "bad or malformed command request?");
+ }
+ return 0;
+ }
+
+ /* --- regular query --- */
+
+ int n = 0;
+ BSONElement hint = useHints ? pq.getHint() : BSONElement();
+ bool explain = pq.isExplain();
+ bool snapshot = pq.isSnapshot();
+ BSONObj order = pq.getOrder();
+ BSONObj query = pq.getFilter();
+
+ /* The ElemIter will not be happy if this isn't really an object. So throw exception
+ here when that is true.
+ (Which may indicate bad data from client.)
+ */
+ if ( query.objsize() == 0 ) {
+ out() << "Bad query object?\n jsobj:";
+ out() << jsobj.toString() << "\n query:";
+ out() << query.toString() << endl;
+ uassert( 10110 , "bad query object", false);
+ }
+
+ Client::ReadContext ctx( ns , dbpath ); // read locks
+
+ replVerifyReadsOk(pq);
+
+ if ( pq.hasOption( QueryOption_CursorTailable ) ) {
+ NamespaceDetails *d = nsdetails( ns );
+ uassert( 13051, "tailable cursor requested on non capped collection", d && d->capped );
+ const BSONObj nat1 = BSON( "$natural" << 1 );
+ if ( order.isEmpty() ) {
+ order = nat1;
+ }
+ else {
+ uassert( 13052, "only {$natural:1} order allowed for tailable cursor", order == nat1 );
+ }
+ }
+
+ BSONObj snapshotHint; // put here to keep the data in scope
+ if( snapshot ) {
+ NamespaceDetails *d = nsdetails(ns);
+ if ( d ) {
+ int i = d->findIdIndex();
+ if( i < 0 ) {
+ if ( strstr( ns , ".system." ) == 0 )
+ log() << "warning: no _id index on $snapshot query, ns:" << ns << endl;
+ }
+ else {
+ /* [dm] the name of an _id index tends to vary, so we build the hint the hard way here.
+ probably need a better way to specify "use the _id index" as a hint. if someone is
+ in the query optimizer please fix this then!
+ */
+ BSONObjBuilder b;
+ b.append("$hint", d->idx(i).indexName());
+ snapshotHint = b.obj();
+ hint = snapshotHint.firstElement();
+ }
+ }
+ }
+
+ if ( ! (explain || pq.showDiskLoc()) && isSimpleIdQuery( query ) && !pq.hasOption( QueryOption_CursorTailable ) ) {
+
+ bool nsFound = false;
+ bool indexFound = false;
+
+ BSONObj resObject;
+ Client& c = cc();
+ bool found = Helpers::findById( c, ns , query , resObject , &nsFound , &indexFound );
+ if ( nsFound == false || indexFound == true ) {
+ BufBuilder bb(sizeof(QueryResult)+resObject.objsize()+32);
+ bb.skip(sizeof(QueryResult));
+
+ curop.debug().idhack = true;
+ if ( found ) {
+ n = 1;
+ fillQueryResultFromObj( bb , pq.getFields() , resObject );
+ }
+ auto_ptr< QueryResult > qr;
+ qr.reset( (QueryResult *) bb.buf() );
+ bb.decouple();
+ qr->setResultFlagsToOk();
+ qr->len = bb.len();
+
+ curop.debug().responseLength = bb.len();
+ qr->setOperation(opReply);
+ qr->cursorId = 0;
+ qr->startingFrom = 0;
+ qr->nReturned = n;
+ result.setData( qr.release(), true );
+ return NULL;
+ }
+ }
+
+ // regular, not QO bypass query
+
+ BSONObj oldPlan;
+ if ( explain && ! pq.hasIndexSpecifier() ) {
+ MultiPlanScanner mps( ns, query, order );
+ if ( mps.usingCachedPlan() )
+ oldPlan = mps.oldExplain();
+ }
+ auto_ptr< MultiPlanScanner > mps( new MultiPlanScanner( ns, query, order, &hint, !explain, pq.getMin(), pq.getMax(), false, true ) );
+ BSONObj explainSuffix;
+ if ( explain ) {
+ BSONObjBuilder bb;
+ if ( !oldPlan.isEmpty() )
+ bb.append( "oldPlan", oldPlan.firstElement().embeddedObject().firstElement().embeddedObject() );
+ explainSuffix = bb.obj();
+ }
+ ExplainBuilder eb;
+ UserQueryOp original( pq, result, eb, curop );
+ shared_ptr< UserQueryOp > o = mps->runOp( original );
+ UserQueryOp &dqo = *o;
+ if ( ! dqo.complete() )
+ throw MsgAssertionException( dqo.exception() );
+ if ( explain ) {
+ dqo.finishExplain( explainSuffix );
+ }
+ n = dqo.n();
+ long long nscanned = dqo.totalNscanned();
+ curop.debug().scanAndOrder = dqo.scanAndOrderRequired();
+
+ shared_ptr<Cursor> cursor = dqo.cursor();
+ if( logLevel >= 5 )
+ log() << " used cursor: " << cursor.get() << endl;
+ long long cursorid = 0;
+ const char * exhaust = 0;
+ if ( dqo.saveClientCursor() || ( dqo.wouldSaveClientCursor() && mps->mayRunMore() ) ) {
+ ClientCursor *cc;
+ bool moreClauses = mps->mayRunMore();
+ if ( moreClauses ) {
+ // this MultiCursor will use a dumb NoOp to advance(), so no need to specify mayYield
+ shared_ptr< Cursor > multi( new MultiCursor( mps, cursor, dqo.matcher( cursor ), dqo ) );
+ cc = new ClientCursor(queryOptions, multi, ns, jsobj.getOwned());
+ }
+ else {
+ if( ! cursor->matcher() ) cursor->setMatcher( dqo.matcher( cursor ) );
+ cc = new ClientCursor( queryOptions, cursor, ns, jsobj.getOwned() );
+ }
+
+ cc->setChunkManager( dqo.getChunkManager() );
+
+ cursorid = cc->cursorid();
+ DEV tlog(2) << "query has more, cursorid: " << cursorid << endl;
+ cc->setPos( n );
+ cc->pq = pq_shared;
+ cc->fields = pq.getFieldPtr();
+ cc->originalMessage = m;
+ cc->updateLocation();
+ if ( !cc->ok() && cc->c()->tailable() )
+ DEV tlog() << "query has no more but tailable, cursorid: " << cursorid << endl;
+ if( queryOptions & QueryOption_Exhaust ) {
+ exhaust = ns;
+ curop.debug().exhaust = true;
+ }
+ dqo.finishForOplogReplay(cc);
+ }
+
+ QueryResult *qr = (QueryResult *) result.header();
+ qr->cursorId = cursorid;
+ qr->setResultFlagsToOk();
+ // qr->len is updated automatically by appendData()
+ curop.debug().responseLength = qr->len;
+ qr->setOperation(opReply);
+ qr->startingFrom = 0;
+ qr->nReturned = n;
+
+ int duration = curop.elapsedMillis();
+ bool dbprofile = curop.shouldDBProfile( duration );
+ if ( dbprofile || duration >= cmdLine.slowMS ) {
+ curop.debug().nscanned = (int) nscanned;
+ curop.debug().ntoskip = ntoskip;
+ }
+ curop.debug().nreturned = n;
+ return exhaust;
+ }
+
+} // namespace mongo