summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenjamin Murphy <benjamin_murphy@me.com>2016-02-29 15:27:18 -0500
committerBenjamin Murphy <benjamin_murphy@me.com>2016-03-04 10:05:05 -0500
commitaee9f7e2a93d89ccbca459993565b182d5296dfa (patch)
tree478963299a3dfafc238ffaa01940f652c73c81c5
parentb465c40655a665f61f34fb225ca77492e47a868f (diff)
downloadmongo-aee9f7e2a93d89ccbca459993565b182d5296dfa.tar.gz
SERVER-22093 Aggregation uses a COUNT plan when no fields are needed from input documents.
-rw-r--r--jstests/aggregation/bugs/server22093.js35
-rw-r--r--jstests/aggregation/testexplain.js560
-rw-r--r--src/mongo/db/pipeline/dependencies.cpp5
-rw-r--r--src/mongo/db/pipeline/dependencies.h4
-rw-r--r--src/mongo/db/pipeline/document_source.h9
-rw-r--r--src/mongo/db/pipeline/document_source_cursor.cpp4
-rw-r--r--src/mongo/db/pipeline/pipeline_d.cpp10
-rw-r--r--src/mongo/db/query/get_executor.cpp7
-rw-r--r--src/mongo/db/query/query_planner_params.h6
9 files changed, 68 insertions, 572 deletions
diff --git a/jstests/aggregation/bugs/server22093.js b/jstests/aggregation/bugs/server22093.js
new file mode 100644
index 00000000000..a3bc05de53a
--- /dev/null
+++ b/jstests/aggregation/bugs/server22093.js
@@ -0,0 +1,35 @@
+// From the work done for SERVER-22093, an aggregation pipeline that does not require any fields
+// from the input documents will tell the query planner to use a count scan, which is faster than an
+// index scan. In this test file, we check this behavior through explain().
+load('jstests/libs/analyze_plan.js');
+
+(function() {
+ "use strict";
+
+ var coll = db.countscan;
+ coll.drop();
+
+ for (var i = 0; i < 3; i++) {
+ for (var j = 0; j < 10; j += 2) {
+ coll.insert({foo: i, bar: j});
+ }
+ }
+
+ coll.ensureIndex({foo: 1});
+
+ var simpleGroup = coll.aggregate([{$group: {_id: null, count: {$sum: 1}}}]).toArray();
+
+ assert.eq(simpleGroup.length, 1);
+ assert.eq(simpleGroup[0]["count"], 15);
+
+ var explained = coll.explain().aggregate([{$match: {foo: {$gt: 0}}},
+ {$group: {_id: null, count: {$sum: 1}}}]);
+
+ assert(planHasStage(explained.stages[0].$cursor.queryPlanner.winningPlan, "COUNT_SCAN"));
+
+ explained = coll.explain().aggregate([{$match: {foo: {$gt: 0}}},
+ {$project: {_id: 0, a: {$literal: null}}},
+ {$group: {_id: null, count: {$sum: 1}}}]);
+
+ assert(planHasStage(explained.stages[0].$cursor.queryPlanner.winningPlan, "COUNT_SCAN"));
+}());
diff --git a/jstests/aggregation/testexplain.js b/jstests/aggregation/testexplain.js
deleted file mode 100644
index 6172df350e8..00000000000
--- a/jstests/aggregation/testexplain.js
+++ /dev/null
@@ -1,560 +0,0 @@
-if ( 0 ) {
-
-/* load the test documents */
-load('jstests/aggregation/data/articles.js');
-
-/* load the test utilities */
-load('jstests/aggregation/extras/utils.js');
-
-function removeVariants(ed) {
- // ignore the timing, since it may vary
- delete ed.serverPipeline[0].cursor.millis;
-
- // ignore the server the test runs on
- delete ed.serverPipeline[0].cursor.server;
-}
-
-/* sample aggregate explain command queries */
-// make sure we're using the right db; this is the same as "use mydb;" in shell
-db = db.getSisterDB("aggdb");
-
-// $max, and averaging in a final projection; _id is structured
-var eg2 = db.runCommand({ aggregate : "article", explain: true,
- splitMongodPipeline: true, pipeline : [
- { $project : {
- author : 1,
- tags : 1,
- pageViews : 1
- }},
- { $unwind : "$tags" },
- { $group : {
- _id: { tags : 1 },
- docsByTag : { $sum : 1 },
- viewsByTag : { $sum : "$pageViews" },
- mostViewsByTag : { $max : "$pageViews" },
- }},
- { $project : {
- _id: false,
- tag : "$_id.tags",
- mostViewsByTag : 1,
- docsByTag : 1,
- viewsByTag : 1,
- avgByTag : { $divide:["$viewsByTag", "$docsByTag"] }
- }}
-]});
-
-removeVariants(eg2);
-
-var eg2result = {
- "serverPipeline" : [
- {
- "query" : {
-
- },
- "cursor" : {
- "cursor" : "BasicCursor",
- "isMultiKey" : false,
- "n" : 3,
- "nscannedObjects" : 3,
- "nscanned" : 3,
- "scanAndOrder" : false,
- "indexOnly" : false,
- "nYields" : 0,
- "nChunkSkips" : 0,
- "indexBounds" : {
-
- },
- "allPlans" : [
- {
- "cursor" : "BasicCursor",
- "n" : 3,
- "nscannedObjects" : 3,
- "nscanned" : 3,
- "indexBounds" : {
-
- }
- }
- ]
- }
- },
- {
- "$project" : {
- "author" : true,
- "pageViews" : true,
- "tags" : true
- }
- },
- {
- "$unwind" : "$tags"
- },
- {
- "$group" : {
- "_id" : {
- "tags" : true
- },
- "docsByTag" : {
- "$sum" : 1
- },
- "viewsByTag" : {
- "$sum" : "$pageViews"
- },
- "mostViewsByTag" : {
- "$max" : "$pageViews"
- }
- }
- }
- ],
- "mongosPipeline" : [
- {
- "$group" : {
- "_id" : "$_id",
- "docsByTag" : {
- "$sum" : "$docsByTag"
- },
- "viewsByTag" : {
- "$sum" : "$viewsByTag"
- },
- "mostViewsByTag" : {
- "$max" : "$mostViewsByTag"
- }
- }
- },
- {
- "$project" : {
- "_id" : false,
- "docsByTag" : true,
- "mostViewsByTag" : true,
- "viewsByTag" : true,
- "tag" : "$_id.tags",
- "avgByTag" : {
- "$divide" : [
- "$viewsByTag",
- "$docsByTag"
- ]
- }
- }
- }
- ],
- "ok" : 1
-};
-
-assert(documentEq(eg2, eg2result), 'eg2 failed');
-
-
-db.digits.drop();
-for(i = 0; i < 21; i += 2) db.digits.insert( { d : i } );
-for(i = 1; i < 20; i += 2) db.digits.insert( { d : i } );
-
-var ed1 = db.runCommand({ aggregate : "digits", explain: true, pipeline : [
- { $match: { d : {$gte : 5, $lte : 15}}}
-]});
-
-removeVariants(ed1);
-
-var ed1result = {
- "serverPipeline" : [
- {
- "query" : {
- "d" : {
- "$gte" : 5,
- "$lte" : 15
- }
- },
- "cursor" : {
- "cursor" : "BasicCursor",
- "isMultiKey" : false,
- "n" : 11,
- "nscannedObjects" : 21,
- "nscanned" : 21,
- "scanAndOrder" : false,
- "indexOnly" : false,
- "nYields" : 0,
- "nChunkSkips" : 0,
- "indexBounds" : {
-
- },
- "allPlans" : [
- {
- "cursor" : "BasicCursor",
- "n" : 11,
- "nscannedObjects" : 21,
- "nscanned" : 21,
- "indexBounds" : {
-
- }
- }
- ]
- }
- }
- ],
- "ok" : 1
-};
-
-assert(documentEq(ed1, ed1result), 'ed1 failed');
-
-
-var ed2 = db.runCommand({ aggregate : "digits", explain: true, pipeline : [
- { $sort : { d : 1 } },
- { $skip : 5 },
- { $limit : 10 }
-]});
-
-removeVariants(ed2);
-
-var ed2result = {
- "serverPipeline" : [
- {
- "query" : {
-
- },
- "cursor" : {
- "cursor" : "BasicCursor",
- "isMultiKey" : false,
- "n" : 21,
- "nscannedObjects" : 21,
- "nscanned" : 21,
- "scanAndOrder" : false,
- "indexOnly" : false,
- "nYields" : 0,
- "nChunkSkips" : 0,
- "indexBounds" : {
-
- },
- "allPlans" : [
- {
- "cursor" : "BasicCursor",
- "n" : 21,
- "nscannedObjects" : 21,
- "nscanned" : 21,
- "indexBounds" : {
-
- }
- }
- ]
- }
- },
- {
- "$sort" : {
- "d" : 1
- }
- },
- {
- "$skip" : NumberLong(5)
- },
- {
- "$limit" : NumberLong(10)
- }
- ],
- "ok" : 1
-};
-
-assert(documentEq(ed2, ed2result), 'ed2 failed');
-
-
-var ed3 = db.runCommand({ aggregate : "digits", explain: true, pipeline : [
- { $match: { d : {$gte : 10, $lte : 20}}},
- { $sort : { d : 1 } },
- { $skip : 5 },
- { $limit : 10 }
-]});
-
-removeVariants(ed3);
-
-var ed3result = {
- "serverPipeline" : [
- {
- "query" : {
- "d" : {
- "$gte" : 10,
- "$lte" : 20
- }
- },
- "cursor" : {
- "cursor" : "BasicCursor",
- "isMultiKey" : false,
- "n" : 11,
- "nscannedObjects" : 21,
- "nscanned" : 21,
- "scanAndOrder" : false,
- "indexOnly" : false,
- "nYields" : 0,
- "nChunkSkips" : 0,
- "indexBounds" : {
-
- },
- "allPlans" : [
- {
- "cursor" : "BasicCursor",
- "n" : 11,
- "nscannedObjects" : 21,
- "nscanned" : 21,
- "indexBounds" : {
-
- }
- }
- ]
- }
- },
- {
- "$sort" : {
- "d" : 1
- }
- },
- {
- "$skip" : NumberLong(5)
- },
- {
- "$limit" : NumberLong(10)
- }
- ],
- "ok" : 1
-};
-
-assert(documentEq(ed3, ed3result), 'ed3 failed');
-
-
-/****
-Repeat those last three with an index
-*****/
-db.digits.ensureIndex( { d : 1 } );
-
-
-var edi1 = db.runCommand({ aggregate : "digits", explain: true, pipeline : [
- { $match: { d : {$gte : 5, $lte : 15}}}
-]});
-
-removeVariants(edi1);
-
-var edi1result = {
- "serverPipeline" : [
- {
- "query" : {
- "d" : {
- "$gte" : 5,
- "$lte" : 15
- }
- },
- "cursor" : {
- "cursor" : "BtreeCursor d_1",
- "isMultiKey" : false,
- "n" : 11,
- "nscannedObjects" : 11,
- "nscanned" : 11,
- "scanAndOrder" : false,
- "indexOnly" : false,
- "nYields" : 0,
- "nChunkSkips" : 0,
- "indexBounds" : {
- "d" : [
- [
- 5,
- 15
- ]
- ]
- },
- "allPlans" : [
- {
- "cursor" : "BtreeCursor d_1",
- "n" : 11,
- "nscannedObjects" : 11,
- "nscanned" : 11,
- "indexBounds" : {
- "d" : [
- [
- 5,
- 15
- ]
- ]
- }
- }
- ],
- "oldPlan" : {
- "cursor" : "BtreeCursor d_1",
- "indexBounds" : {
- "d" : [
- [
- 5,
- 15
- ]
- ]
- }
- }
- }
- }
- ],
- "ok" : 1
-};
-
-assert(documentEq(edi1, edi1result), 'edi1 failed');
-
-
-var edi2 = db.runCommand({ aggregate : "digits", explain: true, pipeline : [
- { $sort : { d : 1 } },
- { $skip : 5 },
- { $limit : 10 }
-]});
-
-removeVariants(edi2);
-
-var edi2result = {
- "serverPipeline" : [
- {
- "query" : {
-
- },
- "sort" : {
- "d" : 1
- },
- "cursor" : {
- "cursor" : "BtreeCursor d_1",
- "isMultiKey" : false,
- "n" : 21,
- "nscannedObjects" : 21,
- "nscanned" : 21,
- "scanAndOrder" : false,
- "indexOnly" : false,
- "nYields" : 0,
- "nChunkSkips" : 0,
- "indexBounds" : {
- "d" : [
- [
- {
- "$minElement" : 1
- },
- {
- "$maxElement" : 1
- }
- ]
- ]
- },
- "allPlans" : [
- {
- "cursor" : "BtreeCursor d_1",
- "n" : 21,
- "nscannedObjects" : 21,
- "nscanned" : 21,
- "indexBounds" : {
- "d" : [
- [
- {
- "$minElement" : 1
- },
- {
- "$maxElement" : 1
- }
- ]
- ]
- }
- }
- ],
- "oldPlan" : {
- "cursor" : "BtreeCursor d_1",
- "indexBounds" : {
- "d" : [
- [
- {
- "$minElement" : 1
- },
- {
- "$maxElement" : 1
- }
- ]
- ]
- }
- }
- }
- },
- {
- "$skip" : NumberLong(5)
- },
- {
- "$limit" : NumberLong(10)
- }
- ],
- "ok" : 1
-};
-
-assert(documentEq(edi2, edi2result), 'edi2 failed');
-
-
-var edi3 = db.runCommand({ aggregate : "digits", explain: true, pipeline : [
- { $match: { d : {$gte : 10, $lte : 20}}},
- { $sort : { d : 1 } },
- { $skip : 5 },
- { $limit : 10 }
-]});
-
-removeVariants(edi3);
-
-var edi3result = {
- "serverPipeline" : [
- {
- "query" : {
- "d" : {
- "$gte" : 10,
- "$lte" : 20
- }
- },
- "sort" : {
- "d" : 1
- },
- "cursor" : {
- "cursor" : "BtreeCursor d_1",
- "isMultiKey" : false,
- "n" : 11,
- "nscannedObjects" : 11,
- "nscanned" : 11,
- "scanAndOrder" : false,
- "indexOnly" : false,
- "nYields" : 0,
- "nChunkSkips" : 0,
- "indexBounds" : {
- "d" : [
- [
- 10,
- 20
- ]
- ]
- },
- "allPlans" : [
- {
- "cursor" : "BtreeCursor d_1",
- "n" : 11,
- "nscannedObjects" : 11,
- "nscanned" : 11,
- "indexBounds" : {
- "d" : [
- [
- 10,
- 20
- ]
- ]
- }
- }
- ],
- "oldPlan" : {
- "cursor" : "BtreeCursor d_1",
- "indexBounds" : {
- "d" : [
- [
- 10,
- 20
- ]
- ]
- }
- }
- }
- },
- {
- "$skip" : NumberLong(5)
- },
- {
- "$limit" : NumberLong(10)
- }
- ],
- "ok" : 1
-};
-
-assert(documentEq(edi3, edi3result), 'edi3 failed');
-
-}
diff --git a/src/mongo/db/pipeline/dependencies.cpp b/src/mongo/db/pipeline/dependencies.cpp
index 30ced50e06a..40cce57e6f0 100644
--- a/src/mongo/db/pipeline/dependencies.cpp
+++ b/src/mongo/db/pipeline/dependencies.cpp
@@ -53,10 +53,7 @@ BSONObj DepsTracker::toProjection() const {
return bb.obj();
if (fields.empty()) {
- // Projection language lacks good a way to say no fields needed. This fakes it.
- bb.append("_id", 0);
- bb.append("$noFieldsNeeded", 1);
- return bb.obj();
+ return BSONObj();
}
bool needId = false;
diff --git a/src/mongo/db/pipeline/dependencies.h b/src/mongo/db/pipeline/dependencies.h
index b39ff1fcfd3..64b8f454a0b 100644
--- a/src/mongo/db/pipeline/dependencies.h
+++ b/src/mongo/db/pipeline/dependencies.h
@@ -53,6 +53,10 @@ struct DepsTracker {
std::set<std::string> fields; // names of needed fields in dotted notation
bool needWholeDocument; // if true, ignore fields and assume the whole document is needed
bool needTextScore;
+
+ bool hasNoRequirements() const {
+ return fields.empty() && !needWholeDocument && !needTextScore;
+ }
};
/**
diff --git a/src/mongo/db/pipeline/document_source.h b/src/mongo/db/pipeline/document_source.h
index 2387c554790..1fa544be059 100644
--- a/src/mongo/db/pipeline/document_source.h
+++ b/src/mongo/db/pipeline/document_source.h
@@ -398,6 +398,14 @@ public:
/// returns -1 for no limit
long long getLimit() const;
+ /**
+ * If subsequent sources need no information from the cursor, the cursor can simply output empty
+ * documents, avoiding the overhead of converting BSONObjs to Documents.
+ */
+ void shouldProduceEmptyDocs() {
+ _shouldProduceEmptyDocs = true;
+ }
+
private:
DocumentSourceCursor(const std::string& ns,
const std::shared_ptr<PlanExecutor>& exec,
@@ -411,6 +419,7 @@ private:
BSONObj _query;
BSONObj _sort;
BSONObj _projection;
+ bool _shouldProduceEmptyDocs = false;
boost::optional<ParsedDeps> _dependencies;
boost::intrusive_ptr<DocumentSourceLimit> _limit;
long long _docsAddedToBatches; // for _limit enforcement
diff --git a/src/mongo/db/pipeline/document_source_cursor.cpp b/src/mongo/db/pipeline/document_source_cursor.cpp
index e851f14cf93..c91c7fb2110 100644
--- a/src/mongo/db/pipeline/document_source_cursor.cpp
+++ b/src/mongo/db/pipeline/document_source_cursor.cpp
@@ -94,7 +94,9 @@ void DocumentSourceCursor::loadBatch() {
BSONObj obj;
PlanExecutor::ExecState state;
while ((state = _exec->getNext(&obj, NULL)) == PlanExecutor::ADVANCED) {
- if (_dependencies) {
+ if (_shouldProduceEmptyDocs) {
+ _currentBatch.push_back(Document());
+ } else if (_dependencies) {
_currentBatch.push_back(_dependencies->extractFields(obj));
} else {
_currentBatch.push_back(Document::fromBsonWithMetaData(obj));
diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp
index 98bca7c1a9e..a41cf01f0d5 100644
--- a/src/mongo/db/pipeline/pipeline_d.cpp
+++ b/src/mongo/db/pipeline/pipeline_d.cpp
@@ -352,6 +352,12 @@ std::shared_ptr<PlanExecutor> PipelineD::prepareExecutor(
plannerOpts |= QueryPlannerParams::INCLUDE_SHARD_FILTER;
}
+ if (deps.hasNoRequirements()) {
+ // If we don't need any fields from the input document, performing a count is faster, and
+ // will output empty documents, which is okay.
+ plannerOpts |= QueryPlannerParams::IS_COUNT;
+ }
+
// The only way to get a text score is to let the query system handle the projection. In all
// other cases, unless the query system can do an index-covered projection and avoid going to
// the raw record at all, it is faster to have ParsedDeps filter the fields we need.
@@ -431,6 +437,10 @@ shared_ptr<PlanExecutor> PipelineD::addCursorSource(const intrusive_ptr<Pipeline
pSource->setQuery(queryObj);
pSource->setSort(sortObj);
+ if (deps.hasNoRequirements()) {
+ pSource->shouldProduceEmptyDocs();
+ }
+
if (!projectionObj.isEmpty()) {
pSource->setProjection(projectionObj, boost::none);
} else {
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index 4e9b8bc89d4..faea115ca02 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -315,8 +315,7 @@ Status prepareExecution(OperationContext* opCtx,
Status status = QueryPlanner::planFromCache(*canonicalQuery, plannerParams, *cs, &qs);
if (status.isOK()) {
- if ((plannerParams.options & QueryPlannerParams::PRIVATE_IS_COUNT) &&
- turnIxscanIntoCount(qs)) {
+ if ((plannerParams.options & QueryPlannerParams::IS_COUNT) && turnIxscanIntoCount(qs)) {
LOG(2) << "Using fast count: " << canonicalQuery->toStringShort();
}
@@ -360,7 +359,7 @@ Status prepareExecution(OperationContext* opCtx,
}
// See if one of our solutions is a fast count hack in disguise.
- if (plannerParams.options & QueryPlannerParams::PRIVATE_IS_COUNT) {
+ if (plannerParams.options & QueryPlannerParams::IS_COUNT) {
for (size_t i = 0; i < solutions.size(); ++i) {
if (turnIxscanIntoCount(solutions[i])) {
// Great, we can use solutions[i]. Clean up the other QuerySolution(s).
@@ -1200,7 +1199,7 @@ StatusWith<unique_ptr<PlanExecutor>> getExecutorCount(OperationContext* txn,
txn, std::move(ws), std::move(root), request.getNs().ns(), yieldPolicy);
}
- const size_t plannerOptions = QueryPlannerParams::PRIVATE_IS_COUNT;
+ const size_t plannerOptions = QueryPlannerParams::IS_COUNT;
PlanStage* child;
QuerySolution* rawQuerySolution;
Status prepStatus = prepareExecution(
diff --git a/src/mongo/db/query/query_planner_params.h b/src/mongo/db/query/query_planner_params.h
index 226c059c912..8210fb05e1a 100644
--- a/src/mongo/db/query/query_planner_params.h
+++ b/src/mongo/db/query/query_planner_params.h
@@ -74,9 +74,9 @@ struct QueryPlannerParams {
// of the query in the query results.
KEEP_MUTATIONS = 1 << 5,
- // Nobody should set this above the getExecutor interface. Internal flag set as a hint
- // to the planner that the caller is actually the count command.
- PRIVATE_IS_COUNT = 1 << 6,
+ // Indicate to the planner that the caller is requesting a count operation, possibly through
+ // a count command, or as part of an aggregation pipeline.
+ IS_COUNT = 1 << 6,
// Set this if you want to handle batchSize properly with sort(). If limits on SORT
// stages are always actually limits, then this should be left off. If they are