diff options
author | Benjamin Murphy <benjamin_murphy@me.com> | 2016-02-29 15:27:18 -0500 |
---|---|---|
committer | Benjamin Murphy <benjamin_murphy@me.com> | 2016-03-04 10:05:05 -0500 |
commit | aee9f7e2a93d89ccbca459993565b182d5296dfa (patch) | |
tree | 478963299a3dfafc238ffaa01940f652c73c81c5 | |
parent | b465c40655a665f61f34fb225ca77492e47a868f (diff) | |
download | mongo-aee9f7e2a93d89ccbca459993565b182d5296dfa.tar.gz |
SERVER-22093 Aggregation uses a COUNT plan when no fields are needed from input documents.
-rw-r--r-- | jstests/aggregation/bugs/server22093.js | 35 | ||||
-rw-r--r-- | jstests/aggregation/testexplain.js | 560 | ||||
-rw-r--r-- | src/mongo/db/pipeline/dependencies.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/pipeline/dependencies.h | 4 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source.h | 9 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_cursor.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline_d.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/query/get_executor.cpp | 7 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_params.h | 6 |
9 files changed, 68 insertions, 572 deletions
diff --git a/jstests/aggregation/bugs/server22093.js b/jstests/aggregation/bugs/server22093.js new file mode 100644 index 00000000000..a3bc05de53a --- /dev/null +++ b/jstests/aggregation/bugs/server22093.js @@ -0,0 +1,35 @@ +// From the work done for SERVER-22093, an aggregation pipeline that does not require any fields +// from the input documents will tell the query planner to use a count scan, which is faster than an +// index scan. In this test file, we check this behavior through explain(). +load('jstests/libs/analyze_plan.js'); + +(function() { + "use strict"; + + var coll = db.countscan; + coll.drop(); + + for (var i = 0; i < 3; i++) { + for (var j = 0; j < 10; j += 2) { + coll.insert({foo: i, bar: j}); + } + } + + coll.ensureIndex({foo: 1}); + + var simpleGroup = coll.aggregate([{$group: {_id: null, count: {$sum: 1}}}]).toArray(); + + assert.eq(simpleGroup.length, 1); + assert.eq(simpleGroup[0]["count"], 15); + + var explained = coll.explain().aggregate([{$match: {foo: {$gt: 0}}}, + {$group: {_id: null, count: {$sum: 1}}}]); + + assert(planHasStage(explained.stages[0].$cursor.queryPlanner.winningPlan, "COUNT_SCAN")); + + explained = coll.explain().aggregate([{$match: {foo: {$gt: 0}}}, + {$project: {_id: 0, a: {$literal: null}}}, + {$group: {_id: null, count: {$sum: 1}}}]); + + assert(planHasStage(explained.stages[0].$cursor.queryPlanner.winningPlan, "COUNT_SCAN")); +}()); diff --git a/jstests/aggregation/testexplain.js b/jstests/aggregation/testexplain.js deleted file mode 100644 index 6172df350e8..00000000000 --- a/jstests/aggregation/testexplain.js +++ /dev/null @@ -1,560 +0,0 @@ -if ( 0 ) { - -/* load the test documents */ -load('jstests/aggregation/data/articles.js'); - -/* load the test utilities */ -load('jstests/aggregation/extras/utils.js'); - -function removeVariants(ed) { - // ignore the timing, since it may vary - delete ed.serverPipeline[0].cursor.millis; - - // ignore the server the test runs on - delete ed.serverPipeline[0].cursor.server; -} - -/* sample aggregate explain command queries */ -// make sure we're using the right db; this is the same as "use mydb;" in shell -db = db.getSisterDB("aggdb"); - -// $max, and averaging in a final projection; _id is structured -var eg2 = db.runCommand({ aggregate : "article", explain: true, - splitMongodPipeline: true, pipeline : [ - { $project : { - author : 1, - tags : 1, - pageViews : 1 - }}, - { $unwind : "$tags" }, - { $group : { - _id: { tags : 1 }, - docsByTag : { $sum : 1 }, - viewsByTag : { $sum : "$pageViews" }, - mostViewsByTag : { $max : "$pageViews" }, - }}, - { $project : { - _id: false, - tag : "$_id.tags", - mostViewsByTag : 1, - docsByTag : 1, - viewsByTag : 1, - avgByTag : { $divide:["$viewsByTag", "$docsByTag"] } - }} -]}); - -removeVariants(eg2); - -var eg2result = { - "serverPipeline" : [ - { - "query" : { - - }, - "cursor" : { - "cursor" : "BasicCursor", - "isMultiKey" : false, - "n" : 3, - "nscannedObjects" : 3, - "nscanned" : 3, - "scanAndOrder" : false, - "indexOnly" : false, - "nYields" : 0, - "nChunkSkips" : 0, - "indexBounds" : { - - }, - "allPlans" : [ - { - "cursor" : "BasicCursor", - "n" : 3, - "nscannedObjects" : 3, - "nscanned" : 3, - "indexBounds" : { - - } - } - ] - } - }, - { - "$project" : { - "author" : true, - "pageViews" : true, - "tags" : true - } - }, - { - "$unwind" : "$tags" - }, - { - "$group" : { - "_id" : { - "tags" : true - }, - "docsByTag" : { - "$sum" : 1 - }, - "viewsByTag" : { - "$sum" : "$pageViews" - }, - "mostViewsByTag" : { - "$max" : "$pageViews" - } - } - } - ], - "mongosPipeline" : [ - { - "$group" : { - "_id" : "$_id", - "docsByTag" : { - "$sum" : "$docsByTag" - }, - "viewsByTag" : { - "$sum" : "$viewsByTag" - }, - "mostViewsByTag" : { - "$max" : "$mostViewsByTag" - } - } - }, - { - "$project" : { - "_id" : false, - "docsByTag" : true, - "mostViewsByTag" : true, - "viewsByTag" : true, - "tag" : "$_id.tags", - "avgByTag" : { - "$divide" : [ - "$viewsByTag", - "$docsByTag" - ] - } - } - } - ], - "ok" : 1 -}; - -assert(documentEq(eg2, eg2result), 'eg2 failed'); - - -db.digits.drop(); -for(i = 0; i < 21; i += 2) db.digits.insert( { d : i } ); -for(i = 1; i < 20; i += 2) db.digits.insert( { d : i } ); - -var ed1 = db.runCommand({ aggregate : "digits", explain: true, pipeline : [ - { $match: { d : {$gte : 5, $lte : 15}}} -]}); - -removeVariants(ed1); - -var ed1result = { - "serverPipeline" : [ - { - "query" : { - "d" : { - "$gte" : 5, - "$lte" : 15 - } - }, - "cursor" : { - "cursor" : "BasicCursor", - "isMultiKey" : false, - "n" : 11, - "nscannedObjects" : 21, - "nscanned" : 21, - "scanAndOrder" : false, - "indexOnly" : false, - "nYields" : 0, - "nChunkSkips" : 0, - "indexBounds" : { - - }, - "allPlans" : [ - { - "cursor" : "BasicCursor", - "n" : 11, - "nscannedObjects" : 21, - "nscanned" : 21, - "indexBounds" : { - - } - } - ] - } - } - ], - "ok" : 1 -}; - -assert(documentEq(ed1, ed1result), 'ed1 failed'); - - -var ed2 = db.runCommand({ aggregate : "digits", explain: true, pipeline : [ - { $sort : { d : 1 } }, - { $skip : 5 }, - { $limit : 10 } -]}); - -removeVariants(ed2); - -var ed2result = { - "serverPipeline" : [ - { - "query" : { - - }, - "cursor" : { - "cursor" : "BasicCursor", - "isMultiKey" : false, - "n" : 21, - "nscannedObjects" : 21, - "nscanned" : 21, - "scanAndOrder" : false, - "indexOnly" : false, - "nYields" : 0, - "nChunkSkips" : 0, - "indexBounds" : { - - }, - "allPlans" : [ - { - "cursor" : "BasicCursor", - "n" : 21, - "nscannedObjects" : 21, - "nscanned" : 21, - "indexBounds" : { - - } - } - ] - } - }, - { - "$sort" : { - "d" : 1 - } - }, - { - "$skip" : NumberLong(5) - }, - { - "$limit" : NumberLong(10) - } - ], - "ok" : 1 -}; - -assert(documentEq(ed2, ed2result), 'ed2 failed'); - - -var ed3 = db.runCommand({ aggregate : "digits", explain: true, pipeline : [ - { $match: { d : {$gte : 10, $lte : 20}}}, - { $sort : { d : 1 } }, - { $skip : 5 }, - { $limit : 10 } -]}); - -removeVariants(ed3); - -var ed3result = { - "serverPipeline" : [ - { - "query" : { - "d" : { - "$gte" : 10, - "$lte" : 20 - } - }, - "cursor" : { - "cursor" : "BasicCursor", - "isMultiKey" : false, - "n" : 11, - "nscannedObjects" : 21, - "nscanned" : 21, - "scanAndOrder" : false, - "indexOnly" : false, - "nYields" : 0, - "nChunkSkips" : 0, - "indexBounds" : { - - }, - "allPlans" : [ - { - "cursor" : "BasicCursor", - "n" : 11, - "nscannedObjects" : 21, - "nscanned" : 21, - "indexBounds" : { - - } - } - ] - } - }, - { - "$sort" : { - "d" : 1 - } - }, - { - "$skip" : NumberLong(5) - }, - { - "$limit" : NumberLong(10) - } - ], - "ok" : 1 -}; - -assert(documentEq(ed3, ed3result), 'ed3 failed'); - - -/**** -Repeat those last three with an index -*****/ -db.digits.ensureIndex( { d : 1 } ); - - -var edi1 = db.runCommand({ aggregate : "digits", explain: true, pipeline : [ - { $match: { d : {$gte : 5, $lte : 15}}} -]}); - -removeVariants(edi1); - -var edi1result = { - "serverPipeline" : [ - { - "query" : { - "d" : { - "$gte" : 5, - "$lte" : 15 - } - }, - "cursor" : { - "cursor" : "BtreeCursor d_1", - "isMultiKey" : false, - "n" : 11, - "nscannedObjects" : 11, - "nscanned" : 11, - "scanAndOrder" : false, - "indexOnly" : false, - "nYields" : 0, - "nChunkSkips" : 0, - "indexBounds" : { - "d" : [ - [ - 5, - 15 - ] - ] - }, - "allPlans" : [ - { - "cursor" : "BtreeCursor d_1", - "n" : 11, - "nscannedObjects" : 11, - "nscanned" : 11, - "indexBounds" : { - "d" : [ - [ - 5, - 15 - ] - ] - } - } - ], - "oldPlan" : { - "cursor" : "BtreeCursor d_1", - "indexBounds" : { - "d" : [ - [ - 5, - 15 - ] - ] - } - } - } - } - ], - "ok" : 1 -}; - -assert(documentEq(edi1, edi1result), 'edi1 failed'); - - -var edi2 = db.runCommand({ aggregate : "digits", explain: true, pipeline : [ - { $sort : { d : 1 } }, - { $skip : 5 }, - { $limit : 10 } -]}); - -removeVariants(edi2); - -var edi2result = { - "serverPipeline" : [ - { - "query" : { - - }, - "sort" : { - "d" : 1 - }, - "cursor" : { - "cursor" : "BtreeCursor d_1", - "isMultiKey" : false, - "n" : 21, - "nscannedObjects" : 21, - "nscanned" : 21, - "scanAndOrder" : false, - "indexOnly" : false, - "nYields" : 0, - "nChunkSkips" : 0, - "indexBounds" : { - "d" : [ - [ - { - "$minElement" : 1 - }, - { - "$maxElement" : 1 - } - ] - ] - }, - "allPlans" : [ - { - "cursor" : "BtreeCursor d_1", - "n" : 21, - "nscannedObjects" : 21, - "nscanned" : 21, - "indexBounds" : { - "d" : [ - [ - { - "$minElement" : 1 - }, - { - "$maxElement" : 1 - } - ] - ] - } - } - ], - "oldPlan" : { - "cursor" : "BtreeCursor d_1", - "indexBounds" : { - "d" : [ - [ - { - "$minElement" : 1 - }, - { - "$maxElement" : 1 - } - ] - ] - } - } - } - }, - { - "$skip" : NumberLong(5) - }, - { - "$limit" : NumberLong(10) - } - ], - "ok" : 1 -}; - -assert(documentEq(edi2, edi2result), 'edi2 failed'); - - -var edi3 = db.runCommand({ aggregate : "digits", explain: true, pipeline : [ - { $match: { d : {$gte : 10, $lte : 20}}}, - { $sort : { d : 1 } }, - { $skip : 5 }, - { $limit : 10 } -]}); - -removeVariants(edi3); - -var edi3result = { - "serverPipeline" : [ - { - "query" : { - "d" : { - "$gte" : 10, - "$lte" : 20 - } - }, - "sort" : { - "d" : 1 - }, - "cursor" : { - "cursor" : "BtreeCursor d_1", - "isMultiKey" : false, - "n" : 11, - "nscannedObjects" : 11, - "nscanned" : 11, - "scanAndOrder" : false, - "indexOnly" : false, - "nYields" : 0, - "nChunkSkips" : 0, - "indexBounds" : { - "d" : [ - [ - 10, - 20 - ] - ] - }, - "allPlans" : [ - { - "cursor" : "BtreeCursor d_1", - "n" : 11, - "nscannedObjects" : 11, - "nscanned" : 11, - "indexBounds" : { - "d" : [ - [ - 10, - 20 - ] - ] - } - } - ], - "oldPlan" : { - "cursor" : "BtreeCursor d_1", - "indexBounds" : { - "d" : [ - [ - 10, - 20 - ] - ] - } - } - } - }, - { - "$skip" : NumberLong(5) - }, - { - "$limit" : NumberLong(10) - } - ], - "ok" : 1 -}; - -assert(documentEq(edi3, edi3result), 'edi3 failed'); - -} diff --git a/src/mongo/db/pipeline/dependencies.cpp b/src/mongo/db/pipeline/dependencies.cpp index 30ced50e06a..40cce57e6f0 100644 --- a/src/mongo/db/pipeline/dependencies.cpp +++ b/src/mongo/db/pipeline/dependencies.cpp @@ -53,10 +53,7 @@ BSONObj DepsTracker::toProjection() const { return bb.obj(); if (fields.empty()) { - // Projection language lacks good a way to say no fields needed. This fakes it. - bb.append("_id", 0); - bb.append("$noFieldsNeeded", 1); - return bb.obj(); + return BSONObj(); } bool needId = false; diff --git a/src/mongo/db/pipeline/dependencies.h b/src/mongo/db/pipeline/dependencies.h index b39ff1fcfd3..64b8f454a0b 100644 --- a/src/mongo/db/pipeline/dependencies.h +++ b/src/mongo/db/pipeline/dependencies.h @@ -53,6 +53,10 @@ struct DepsTracker { std::set<std::string> fields; // names of needed fields in dotted notation bool needWholeDocument; // if true, ignore fields and assume the whole document is needed bool needTextScore; + + bool hasNoRequirements() const { + return fields.empty() && !needWholeDocument && !needTextScore; + } }; /** diff --git a/src/mongo/db/pipeline/document_source.h b/src/mongo/db/pipeline/document_source.h index 2387c554790..1fa544be059 100644 --- a/src/mongo/db/pipeline/document_source.h +++ b/src/mongo/db/pipeline/document_source.h @@ -398,6 +398,14 @@ public: /// returns -1 for no limit long long getLimit() const; + /** + * If subsequent sources need no information from the cursor, the cursor can simply output empty + * documents, avoiding the overhead of converting BSONObjs to Documents. + */ + void shouldProduceEmptyDocs() { + _shouldProduceEmptyDocs = true; + } + private: DocumentSourceCursor(const std::string& ns, const std::shared_ptr<PlanExecutor>& exec, @@ -411,6 +419,7 @@ private: BSONObj _query; BSONObj _sort; BSONObj _projection; + bool _shouldProduceEmptyDocs = false; boost::optional<ParsedDeps> _dependencies; boost::intrusive_ptr<DocumentSourceLimit> _limit; long long _docsAddedToBatches; // for _limit enforcement diff --git a/src/mongo/db/pipeline/document_source_cursor.cpp b/src/mongo/db/pipeline/document_source_cursor.cpp index e851f14cf93..c91c7fb2110 100644 --- a/src/mongo/db/pipeline/document_source_cursor.cpp +++ b/src/mongo/db/pipeline/document_source_cursor.cpp @@ -94,7 +94,9 @@ void DocumentSourceCursor::loadBatch() { BSONObj obj; PlanExecutor::ExecState state; while ((state = _exec->getNext(&obj, NULL)) == PlanExecutor::ADVANCED) { - if (_dependencies) { + if (_shouldProduceEmptyDocs) { + _currentBatch.push_back(Document()); + } else if (_dependencies) { _currentBatch.push_back(_dependencies->extractFields(obj)); } else { _currentBatch.push_back(Document::fromBsonWithMetaData(obj)); diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp index 98bca7c1a9e..a41cf01f0d5 100644 --- a/src/mongo/db/pipeline/pipeline_d.cpp +++ b/src/mongo/db/pipeline/pipeline_d.cpp @@ -352,6 +352,12 @@ std::shared_ptr<PlanExecutor> PipelineD::prepareExecutor( plannerOpts |= QueryPlannerParams::INCLUDE_SHARD_FILTER; } + if (deps.hasNoRequirements()) { + // If we don't need any fields from the input document, performing a count is faster, and + // will output empty documents, which is okay. + plannerOpts |= QueryPlannerParams::IS_COUNT; + } + // The only way to get a text score is to let the query system handle the projection. In all // other cases, unless the query system can do an index-covered projection and avoid going to // the raw record at all, it is faster to have ParsedDeps filter the fields we need. @@ -431,6 +437,10 @@ shared_ptr<PlanExecutor> PipelineD::addCursorSource(const intrusive_ptr<Pipeline pSource->setQuery(queryObj); pSource->setSort(sortObj); + if (deps.hasNoRequirements()) { + pSource->shouldProduceEmptyDocs(); + } + if (!projectionObj.isEmpty()) { pSource->setProjection(projectionObj, boost::none); } else { diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index 4e9b8bc89d4..faea115ca02 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -315,8 +315,7 @@ Status prepareExecution(OperationContext* opCtx, Status status = QueryPlanner::planFromCache(*canonicalQuery, plannerParams, *cs, &qs); if (status.isOK()) { - if ((plannerParams.options & QueryPlannerParams::PRIVATE_IS_COUNT) && - turnIxscanIntoCount(qs)) { + if ((plannerParams.options & QueryPlannerParams::IS_COUNT) && turnIxscanIntoCount(qs)) { LOG(2) << "Using fast count: " << canonicalQuery->toStringShort(); } @@ -360,7 +359,7 @@ Status prepareExecution(OperationContext* opCtx, } // See if one of our solutions is a fast count hack in disguise. - if (plannerParams.options & QueryPlannerParams::PRIVATE_IS_COUNT) { + if (plannerParams.options & QueryPlannerParams::IS_COUNT) { for (size_t i = 0; i < solutions.size(); ++i) { if (turnIxscanIntoCount(solutions[i])) { // Great, we can use solutions[i]. Clean up the other QuerySolution(s). @@ -1200,7 +1199,7 @@ StatusWith<unique_ptr<PlanExecutor>> getExecutorCount(OperationContext* txn, txn, std::move(ws), std::move(root), request.getNs().ns(), yieldPolicy); } - const size_t plannerOptions = QueryPlannerParams::PRIVATE_IS_COUNT; + const size_t plannerOptions = QueryPlannerParams::IS_COUNT; PlanStage* child; QuerySolution* rawQuerySolution; Status prepStatus = prepareExecution( diff --git a/src/mongo/db/query/query_planner_params.h b/src/mongo/db/query/query_planner_params.h index 226c059c912..8210fb05e1a 100644 --- a/src/mongo/db/query/query_planner_params.h +++ b/src/mongo/db/query/query_planner_params.h @@ -74,9 +74,9 @@ struct QueryPlannerParams { // of the query in the query results. KEEP_MUTATIONS = 1 << 5, - // Nobody should set this above the getExecutor interface. Internal flag set as a hint - // to the planner that the caller is actually the count command. - PRIVATE_IS_COUNT = 1 << 6, + // Indicate to the planner that the caller is requesting a count operation, possibly through + // a count command, or as part of an aggregation pipeline. + IS_COUNT = 1 << 6, // Set this if you want to handle batchSize properly with sort(). If limits on SORT // stages are always actually limits, then this should be left off. If they are |