diff options
author | David Storch <david.storch@10gen.com> | 2015-05-12 11:20:17 -0400 |
---|---|---|
committer | David Storch <david.storch@10gen.com> | 2015-05-28 16:17:23 -0400 |
commit | ebd449e59e621e198a1b91f1c697b2a6b2ea8b2a (patch) | |
tree | 8c83d325268ba3a706a19606f6aed244308c160f | |
parent | 99a94133798eb8d3d812420246d0a97df4354df1 (diff) | |
download | mongo-ebd449e59e621e198a1b91f1c697b2a6b2ea8b2a.tar.gz |
SERVER-17575 find and getMore commands respect BSONObj max user size limit
-rw-r--r-- | buildscripts/resmokeconfig/suites/sharding_jscore_passthrough.yml | 1 | ||||
-rw-r--r-- | jstests/core/find_getmore_bsonsize.js | 67 | ||||
-rw-r--r-- | jstests/slow2/sharding_jscore_passthrough.js | 1 | ||||
-rw-r--r-- | src/mongo/db/commands/find_cmd.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/commands/getmore_cmd.cpp | 18 | ||||
-rw-r--r-- | src/mongo/db/query/plan_executor.cpp | 21 | ||||
-rw-r--r-- | src/mongo/db/query/plan_executor.h | 20 |
7 files changed, 121 insertions, 16 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/sharding_jscore_passthrough.yml index 67cb95be16b..989c002edaf 100644 --- a/buildscripts/resmokeconfig/suites/sharding_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/sharding_jscore_passthrough.yml @@ -47,6 +47,7 @@ selector: - jstests/core/dbcase2.js # SERVER-11735 # TODO: SERVER-17284 remove once find cmd is implemented in mongos - jstests/core/read_after_optime.js + - jstests/core/find_getmore_bsonsize.js - jstests/core/find_getmore_cmd.js executor: diff --git a/jstests/core/find_getmore_bsonsize.js b/jstests/core/find_getmore_bsonsize.js new file mode 100644 index 00000000000..67cfea70800 --- /dev/null +++ b/jstests/core/find_getmore_bsonsize.js @@ -0,0 +1,67 @@ +// Ensure that the find and getMore commands can handle documents nearing the 16 MB size limit for +// user-stored BSON documents. +(function() { + 'use strict'; + + var cmdRes; + var collName = 'find_getmore_cmd'; + var coll = db[collName]; + + coll.drop(); + + var oneKB = 1024; + var oneMB = 1024 * oneKB; + + // Build a 1 MB - 1 KB) string. + var smallStr = 'x'; + while (smallStr.length < oneMB) { + smallStr += smallStr; + } + assert.eq(smallStr.length, oneMB); + smallStr = smallStr.substring(0, oneMB - oneKB); + + // Build a (16 MB - 1 KB) string. + var bigStr = 'y'; + while (bigStr.length < (16 * oneMB)) { + bigStr += bigStr; + } + assert.eq(bigStr.length, 16 * oneMB); + bigStr = bigStr.substring(0, (16 * oneMB) - oneKB); + + // Collection has one ~1 MB doc followed by one ~16 MB doc. + assert.writeOK(coll.insert({_id: 0, padding: smallStr})); + assert.writeOK(coll.insert({_id: 1, padding: bigStr})); + + // Find command should just return the first doc, as adding the last would create an invalid + // command response document. + cmdRes = db.runCommand({find: collName}); + assert.commandWorked(cmdRes); + assert.gt(cmdRes.cursor.id, NumberLong(0)); + assert.eq(cmdRes.cursor.ns, coll.getFullName()); + assert.eq(cmdRes.cursor.firstBatch.length, 1); + + // The 16 MB doc should be returned on getMore. + cmdRes = db.runCommand({getMore: cmdRes.cursor.id, collection: collName}); + assert.eq(cmdRes.cursor.id, NumberLong(0)); + assert.eq(cmdRes.cursor.ns, coll.getFullName()); + assert.eq(cmdRes.cursor.nextBatch.length, 1); + + // Setup a cursor without returning any results (batchSize of zero). + cmdRes = db.runCommand({find: collName, batchSize: 0}); + assert.commandWorked(cmdRes); + assert.gt(cmdRes.cursor.id, NumberLong(0)); + assert.eq(cmdRes.cursor.ns, coll.getFullName()); + assert.eq(cmdRes.cursor.firstBatch.length, 0); + + // First getMore should only return one doc, since both don't fit in the response. + cmdRes = db.runCommand({getMore: cmdRes.cursor.id, collection: collName}); + assert.gt(cmdRes.cursor.id, NumberLong(0)); + assert.eq(cmdRes.cursor.ns, coll.getFullName()); + assert.eq(cmdRes.cursor.nextBatch.length, 1); + + // Second getMore should return the second doc and close the cursor. + cmdRes = db.runCommand({getMore: cmdRes.cursor.id, collection: collName}); + assert.eq(cmdRes.cursor.id, NumberLong(0)); + assert.eq(cmdRes.cursor.ns, coll.getFullName()); + assert.eq(cmdRes.cursor.nextBatch.length, 1); +})(); diff --git a/jstests/slow2/sharding_jscore_passthrough.js b/jstests/slow2/sharding_jscore_passthrough.js index 63762ae1e48..1f70c4a16be 100644 --- a/jstests/slow2/sharding_jscore_passthrough.js +++ b/jstests/slow2/sharding_jscore_passthrough.js @@ -89,6 +89,7 @@ var db; 'unix_socket\\d*|' + // TODO: SERVER-17284 remove once find cmd is // implemented in mongos + 'find_getmore_bsonsize|' + 'find_getmore_cmd|' + 'read_after_optime' + ')\.js$'); diff --git a/src/mongo/db/commands/find_cmd.cpp b/src/mongo/db/commands/find_cmd.cpp index 098235d14c6..f40c7f5d915 100644 --- a/src/mongo/db/commands/find_cmd.cpp +++ b/src/mongo/db/commands/find_cmd.cpp @@ -295,14 +295,19 @@ namespace mongo { PlanExecutor* exec = cursor->getExecutor(); // 5) Stream query results, adding them to a BSONArray as we go. - // - // TODO: Handle result sets larger than 16MB. BSONArrayBuilder firstBatch; BSONObj obj; PlanExecutor::ExecState state; int numResults = 0; while (!enoughForFirstBatch(pq, numResults, firstBatch.len()) && PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) { + // If adding this object will cause us to exceed the BSON size limit, then we stash + // it for later. + if (firstBatch.len() + obj.objsize() > BSONObjMaxUserSize && numResults > 0) { + exec->enqueue(obj); + break; + } + // Add result to output buffer. firstBatch.append(obj); numResults++; diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp index 9c4fa4ff851..91dcffe3a4f 100644 --- a/src/mongo/db/commands/getmore_cmd.cpp +++ b/src/mongo/db/commands/getmore_cmd.cpp @@ -54,8 +54,8 @@ namespace mongo { /** - * A command for running getMore() against an existing cursor registered with a - * CursorManager. + * A command for running getMore() against an existing cursor registered with a CursorManager. + * Used to generate the next batch of results for a ClientCursor. * * Can be used in combination with any cursor-generating command (e.g. find, aggregate, * listIndexes). @@ -101,11 +101,6 @@ namespace mongo { request.cursorid); } - /** - * Generates the next batch of results for a ClientCursor. - * - * TODO: Do we need to support some equivalent of OP_REPLY responseFlags? - */ bool run(OperationContext* txn, const std::string& dbname, BSONObj& cmdObj, @@ -339,11 +334,16 @@ namespace mongo { // If an awaitData getMore is killed during this process due to our max time expiring at // an interrupt point, we just continue as normal and return rather than reporting a // timeout to the user. - // - // TODO: Handle result sets larger than 16MB. BSONObj obj; try { while (PlanExecutor::ADVANCED == (*state = exec->getNext(&obj, NULL))) { + // If adding this object will cause us to exceed the BSON size limit, then we + // stash it for later. + if (nextBatch->len() + obj.objsize() > BSONObjMaxUserSize && *numResults > 0) { + exec->enqueue(obj); + break; + } + // Add result to output buffer. nextBatch->append(obj); (*numResults)++; diff --git a/src/mongo/db/query/plan_executor.cpp b/src/mongo/db/query/plan_executor.cpp index cd9d2478df0..1a4b38482b6 100644 --- a/src/mongo/db/query/plan_executor.cpp +++ b/src/mongo/db/query/plan_executor.cpp @@ -312,14 +312,21 @@ namespace mongo { PlanExecutor::ExecState PlanExecutor::getNextSnapshotted(Snapshotted<BSONObj>* objOut, RecordId* dlOut) { - if (killed()) { + if (killed()) { if (NULL != objOut) { Status status(ErrorCodes::OperationFailed, str::stream() << "Operation aborted because: " << *_killReason); *objOut = Snapshotted<BSONObj>(SnapshotId(), WorkingSetCommon::buildMemberStatusObject(status)); } - return PlanExecutor::DEAD; + return PlanExecutor::DEAD; + } + + if (!_stash.empty()) { + invariant(objOut && !dlOut); + *objOut = {SnapshotId(), _stash.front()}; + _stash.pop(); + return PlanExecutor::ADVANCED; } // When a stage requests a yield for document fetch, it gives us back a RecordFetcher* @@ -342,8 +349,8 @@ namespace mongo { if (killed()) { if (NULL != objOut) { - Status status(ErrorCodes::OperationFailed, - str::stream() << "Operation aborted because: " + Status status(ErrorCodes::OperationFailed, + str::stream() << "Operation aborted because: " << *_killReason); *objOut = Snapshotted<BSONObj>( SnapshotId(), @@ -460,7 +467,7 @@ namespace mongo { } bool PlanExecutor::isEOF() { - return killed() || _root->isEOF(); + return killed() || (_stash.empty() && _root->isEOF()); } void PlanExecutor::registerExec() { @@ -537,6 +544,10 @@ namespace mongo { } } + void PlanExecutor::enqueue(const BSONObj& obj) { + _stash.push(obj.getOwned()); + } + // // ScopedExecutorRegistration // diff --git a/src/mongo/db/query/plan_executor.h b/src/mongo/db/query/plan_executor.h index 98fd8991055..8304b5d7e34 100644 --- a/src/mongo/db/query/plan_executor.h +++ b/src/mongo/db/query/plan_executor.h @@ -30,6 +30,7 @@ #include <boost/optional.hpp> #include <boost/scoped_ptr.hpp> +#include <queue> #include "mongo/base/status.h" #include "mongo/db/invalidation_type.h" @@ -337,6 +338,20 @@ namespace mongo { */ void setYieldPolicy(YieldPolicy policy, bool registerExecutor = true); + /** + * Stash the BSONObj so that it gets returned from the PlanExecutor on a later call to + * getNext(). + * + * Enqueued documents are returned in FIFO order. The queued results are exhausted before + * generating further results from the underlying query plan. + * + * Subsequent calls to getNext() must request the BSONObj and *not* the RecordId. + * + * If used in combination with getNextSnapshotted(), then the SnapshotId associated with + * 'obj' will be null when 'obj' is dequeued. + */ + void enqueue(const BSONObj& obj); + private: /** * RAII approach to ensuring that plan executors are deregistered. @@ -422,6 +437,11 @@ namespace mongo { // TODO make this a non-pointer member. This requires some header shuffling so that this // file includes plan_yield_policy.h rather than the other way around. const boost::scoped_ptr<PlanYieldPolicy> _yieldPolicy; + + // A stash of results generated by this plan that the user of the PlanExecutor didn't want + // to consume yet. We empty the queue before retrieving further results from the plan + // stages. + std::queue<BSONObj> _stash; }; } // namespace mongo |