SERVER-22093 Aggregation uses a COUNT plan when no fields are needed from input documents.

author: Benjamin Murphy <benjamin_murphy@me.com> 2016-02-29 15:27:18 -0500
committer: Benjamin Murphy <benjamin_murphy@me.com> 2016-03-04 10:05:05 -0500
commit: aee9f7e2a93d89ccbca459993565b182d5296dfa (patch)
tree: 478963299a3dfafc238ffaa01940f652c73c81c5
parent: b465c40655a665f61f34fb225ca77492e47a868f (diff)
download: mongo-aee9f7e2a93d89ccbca459993565b182d5296dfa.tar.gz
9 files changed, 68 insertions, 572 deletions
diff --git a/jstests/aggregation/bugs/server22093.js b/jstests/aggregation/bugs/server22093.js
new file mode 100644
index 00000000000..a3bc05de53a
--- /dev/null
+++ b/jstests/aggregation/bugs/server22093.js
@@ -0,0 +1,35 @@
+// From the work done for SERVER-22093, an aggregation pipeline that does not require any fields
+// from the input documents will tell the query planner to use a count scan, which is faster than an
+// index scan. In this test file, we check this behavior through explain().
+load('jstests/libs/analyze_plan.js');
+
+(function() {
+    "use strict";
+
+    var coll = db.countscan;
+    coll.drop();
+
+    for (var i = 0; i < 3; i++) {
+        for (var j = 0; j < 10; j += 2) {
+            coll.insert({foo: i, bar: j});
+        }
+    }
+
+    coll.ensureIndex({foo: 1});
+
+    var simpleGroup = coll.aggregate([{$group: {_id: null, count: {$sum: 1}}}]).toArray();
+
+    assert.eq(simpleGroup.length, 1);
+    assert.eq(simpleGroup[0]["count"], 15);
+
+    var explained = coll.explain().aggregate([{$match: {foo: {$gt: 0}}},
+            {$group: {_id: null, count: {$sum: 1}}}]);
+
+    assert(planHasStage(explained.stages[0].$cursor.queryPlanner.winningPlan, "COUNT_SCAN"));
+
+    explained = coll.explain().aggregate([{$match: {foo: {$gt: 0}}},
+            {$project: {_id: 0, a: {$literal: null}}},
+            {$group: {_id: null, count: {$sum: 1}}}]);
+
+    assert(planHasStage(explained.stages[0].$cursor.queryPlanner.winningPlan, "COUNT_SCAN"));
+}());
diff --git a/jstests/aggregation/testexplain.js b/jstests/aggregation/testexplain.js
deleted file mode 100644
index 6172df350e8..00000000000
--- a/jstests/aggregation/testexplain.js
+++ /dev/null
@@ -1,560 +0,0 @@
-if ( 0 ) {
-
-/* load the test documents */
-load('jstests/aggregation/data/articles.js');
-
-/* load the test utilities */
-load('jstests/aggregation/extras/utils.js');
-
-function removeVariants(ed) {
-    // ignore the timing, since it may vary
-    delete ed.serverPipeline[0].cursor.millis;
-
-    // ignore the server the test runs on
-    delete ed.serverPipeline[0].cursor.server;
-}
-
-/* sample aggregate explain command queries */
-// make sure we're using the right db; this is the same as "use mydb;" in shell
-db = db.getSisterDB("aggdb");
-
-// $max, and averaging in a final projection; _id is structured
-var eg2 = db.runCommand({ aggregate : "article", explain: true,
-                          splitMongodPipeline: true, pipeline : [
-    { $project : {
-        author : 1,
-        tags : 1,
-        pageViews : 1
-    }},
-    { $unwind : "$tags" },
-    { $group : {
-        _id: { tags : 1 },
-        docsByTag : { $sum : 1 },
-        viewsByTag : { $sum : "$pageViews" },
-        mostViewsByTag : { $max : "$pageViews" },
-    }},
-    { $project : {
-        _id: false,
-        tag : "$_id.tags",
-        mostViewsByTag : 1,
-        docsByTag : 1,
-        viewsByTag : 1,
-        avgByTag : { $divide:["$viewsByTag", "$docsByTag"] }
-    }}
-]});
-
-removeVariants(eg2);
-
-var eg2result = {
-        "serverPipeline" : [
-                {
-                        "query" : {
-
-                        },
-                        "cursor" : {
-                                "cursor" : "BasicCursor",
-                                "isMultiKey" : false,
-                                "n" : 3,
-                                "nscannedObjects" : 3,
-                                "nscanned" : 3,
-                                "scanAndOrder" : false,
-                                "indexOnly" : false,
-                                "nYields" : 0,
-                                "nChunkSkips" : 0,
-                                "indexBounds" : {
-
-                                },
-                                "allPlans" : [
-                                        {
-                                                "cursor" : "BasicCursor",
-                                                "n" : 3,
-                                                "nscannedObjects" : 3,
-                                                "nscanned" : 3,
-                                                "indexBounds" : {
-
-                                                }
-                                        }
-                                ]
-                        }
-                },
-                {
-                        "$project" : {
-                                "author" : true,
-                                "pageViews" : true,
-                                "tags" : true
-                        }
-                },
-                {
-                        "$unwind" : "$tags"
-                },
-                {
-                        "$group" : {
-                                "_id" : {
-                                        "tags" : true
-                                },
-                                "docsByTag" : {
-                                        "$sum" : 1
-                                },
-                                "viewsByTag" : {
-                                        "$sum" : "$pageViews"
-                                },
-                                "mostViewsByTag" : {
-                                        "$max" : "$pageViews"
-                                }
-                        }
-                }
-        ],
-        "mongosPipeline" : [
-                {
-                        "$group" : {
-                                "_id" : "$_id",
-                                "docsByTag" : {
-                                        "$sum" : "$docsByTag"
-                                },
-                                "viewsByTag" : {
-                                        "$sum" : "$viewsByTag"
-                                },
-                                "mostViewsByTag" : {
-                                        "$max" : "$mostViewsByTag"
-                                }
-                        }
-                },
-                {
-                        "$project" : {
-                                "_id" : false,
-                                "docsByTag" : true,
-                                "mostViewsByTag" : true,
-                                "viewsByTag" : true,
-                                "tag" : "$_id.tags",
-                                "avgByTag" : {
-                                        "$divide" : [
-                                                "$viewsByTag",
-                                                "$docsByTag"
-                                        ]
-                                }
-                        }
-                }
-        ],
-        "ok" : 1
-};
-
-assert(documentEq(eg2, eg2result), 'eg2 failed');
-
-
-db.digits.drop();
-for(i = 0; i < 21; i += 2) db.digits.insert( { d : i } );
-for(i = 1; i < 20; i += 2) db.digits.insert( { d : i } );
-
-var ed1 = db.runCommand({ aggregate : "digits", explain: true, pipeline : [
-    { $match: { d : {$gte : 5, $lte : 15}}}
-]});
-
-removeVariants(ed1);
-
-var ed1result = {
-        "serverPipeline" : [
-                {
-                        "query" : {
-                                "d" : {
-                                        "$gte" : 5,
-                                        "$lte" : 15
-                                }
-                        },
-                        "cursor" : {
-                                "cursor" : "BasicCursor",
-                                "isMultiKey" : false,
-                                "n" : 11,
-                                "nscannedObjects" : 21,
-                                "nscanned" : 21,
-                                "scanAndOrder" : false,
-                                "indexOnly" : false,
-                                "nYields" : 0,
-                                "nChunkSkips" : 0,
-                                "indexBounds" : {
-
-                                },
-                                "allPlans" : [
-                                        {
-                                                "cursor" : "BasicCursor",
-                                                "n" : 11,
-                                                "nscannedObjects" : 21,
-                                                "nscanned" : 21,
-                                                "indexBounds" : {
-
-                                                }
-                                        }
-                                ]
-                        }
-                }
-        ],
-        "ok" : 1
-};
-
-assert(documentEq(ed1, ed1result), 'ed1 failed');
-
-
-var ed2 = db.runCommand({ aggregate : "digits", explain: true, pipeline : [
-    { $sort : { d : 1 } },
-    { $skip : 5 },
-    { $limit : 10 }
-]});
-
-removeVariants(ed2);
-
-var ed2result = {
-        "serverPipeline" : [
-                {
-                        "query" : {
-
-                        },
-                        "cursor" : {
-                                "cursor" : "BasicCursor",
-                                "isMultiKey" : false,
-                                "n" : 21,
-                                "nscannedObjects" : 21,
-                                "nscanned" : 21,
-                                "scanAndOrder" : false,
-                                "indexOnly" : false,
-                                "nYields" : 0,
-                                "nChunkSkips" : 0,
-                                "indexBounds" : {
-
-                                },
-                                "allPlans" : [
-                                        {
-                                                "cursor" : "BasicCursor",
-                                                "n" : 21,
-                                                "nscannedObjects" : 21,
-                                                "nscanned" : 21,
-                                                "indexBounds" : {
-
-                                                }
-                                        }
-                                ]
-                        }
-                },
-                {
-                        "$sort" : {
-                                "d" : 1
-                        }
-                },
-                {
-                        "$skip" : NumberLong(5)
-                },
-                {
-                        "$limit" : NumberLong(10)
-                }
-        ],
-        "ok" : 1
-};
-
-assert(documentEq(ed2, ed2result), 'ed2 failed');
-
-
-var ed3 = db.runCommand({ aggregate : "digits", explain: true, pipeline : [
-    { $match: { d : {$gte : 10, $lte : 20}}},
-    { $sort : { d : 1 } },
-    { $skip : 5 },
-    { $limit : 10 }
-]});
-
-removeVariants(ed3);
-
-var ed3result = {
-        "serverPipeline" : [
-                {
-                        "query" : {
-                                "d" : {
-                                        "$gte" : 10,
-                                        "$lte" : 20
-                                }
-                        },
-                        "cursor" : {
-                                "cursor" : "BasicCursor",
-                                "isMultiKey" : false,
-                                "n" : 11,
-                                "nscannedObjects" : 21,
-                                "nscanned" : 21,
-                                "scanAndOrder" : false,
-                                "indexOnly" : false,
-                                "nYields" : 0,
-                                "nChunkSkips" : 0,
-                                "indexBounds" : {
-
-                                },
-                                "allPlans" : [
-                                        {
-                                                "cursor" : "BasicCursor",
-                                                "n" : 11,
-                                                "nscannedObjects" : 21,
-                                                "nscanned" : 21,
-                                                "indexBounds" : {
-
-                                                }
-                                        }
-                                ]
-                        }
-                },
-                {
-                        "$sort" : {
-                                "d" : 1
-                        }
-                },
-                {
-                        "$skip" : NumberLong(5)
-                },
-                {
-                        "$limit" : NumberLong(10)
-                }
-        ],
-        "ok" : 1
-};
-
-assert(documentEq(ed3, ed3result), 'ed3 failed');
-
-
-/****
-Repeat those last three with an index
-*****/
-db.digits.ensureIndex( { d : 1 } );
-
-
-var edi1 = db.runCommand({ aggregate : "digits", explain: true, pipeline : [
-    { $match: { d : {$gte : 5, $lte : 15}}}
-]});
-
-removeVariants(edi1);
-
-var edi1result = {
-        "serverPipeline" : [
-                {
-                        "query" : {
-                                "d" : {
-                                        "$gte" : 5,
-                                        "$lte" : 15
-                                }
-                        },
-                        "cursor" : {
-                                "cursor" : "BtreeCursor d_1",
-                                "isMultiKey" : false,
-                                "n" : 11,
-                                "nscannedObjects" : 11,
-                                "nscanned" : 11,
-                                "scanAndOrder" : false,
-                                "indexOnly" : false,
-                                "nYields" : 0,
-                                "nChunkSkips" : 0,
-                                "indexBounds" : {
-                                        "d" : [
-                                                [
-                                                        5,
-                                                        15
-                                                ]
-                                        ]
-                                },
-                                "allPlans" : [
-                                        {
-                                                "cursor" : "BtreeCursor d_1",
-                                                "n" : 11,
-                                                "nscannedObjects" : 11,
-                                                "nscanned" : 11,
-                                                "indexBounds" : {
-                                                        "d" : [
-                                                                [
-                                                                        5,
-                                                                        15
-                                                                ]
-                                                        ]
-                                                }
-                                        }
-                                ],
-                                "oldPlan" : {
-                                        "cursor" : "BtreeCursor d_1",
-                                        "indexBounds" : {
-                                                "d" : [
-                                                        [
-                                                                5,
-                                                                15
-                                                        ]
-                                                ]
-                                        }
-                                }
-                        }
-                }
-        ],
-        "ok" : 1
-};
-
-assert(documentEq(edi1, edi1result), 'edi1 failed');
-
-
-var edi2 = db.runCommand({ aggregate : "digits", explain: true, pipeline : [
-    { $sort : { d : 1 } },
-    { $skip : 5 },
-    { $limit : 10 }
-]});
-
-removeVariants(edi2);
-
-var edi2result = {
-    "serverPipeline" : [
-        {
-            "query" : {
-                
-            },
-            "sort" : {
-                "d" : 1
-            },
-            "cursor" : {
-                "cursor" : "BtreeCursor d_1",
-                "isMultiKey" : false,
-                "n" : 21,
-                "nscannedObjects" : 21,
-                "nscanned" : 21,
-                "scanAndOrder" : false,
-                "indexOnly" : false,
-                "nYields" : 0,
-                "nChunkSkips" : 0,
-                "indexBounds" : {
-                    "d" : [
-                        [
-                            {
-                                "$minElement" : 1
-                            },
-                            {
-                                "$maxElement" : 1
-                            }
-                        ]
-                    ]
-                },
-                "allPlans" : [
-                    {
-                        "cursor" : "BtreeCursor d_1",
-                        "n" : 21,
-                        "nscannedObjects" : 21,
-                        "nscanned" : 21,
-                        "indexBounds" : {
-                            "d" : [
-                                [
-                                    {
-                                        "$minElement" : 1
-                                    },
-                                    {
-                                        "$maxElement" : 1
-                                    }
-                                ]
-                            ]
-                        }
-                    }
-                ],
-                "oldPlan" : {
-                    "cursor" : "BtreeCursor d_1",
-                    "indexBounds" : {
-                        "d" : [
-                            [
-                                {
-                                    "$minElement" : 1
-                                },
-                                {
-                                    "$maxElement" : 1
-                                }
-                            ]
-                        ]
-                    }
-                }
-            }
-        },
-        {
-            "$skip" : NumberLong(5)
-        },
-        {
-            "$limit" : NumberLong(10)
-        }
-    ],
-    "ok" : 1
-};
-
-assert(documentEq(edi2, edi2result), 'edi2 failed');
-
-
-var edi3 = db.runCommand({ aggregate : "digits", explain: true, pipeline : [
-    { $match: { d : {$gte : 10, $lte : 20}}},
-    { $sort : { d : 1 } },
-    { $skip : 5 },
-    { $limit : 10 }
-]});
-
-removeVariants(edi3);
-
-var edi3result = {
-        "serverPipeline" : [
-                {
-                        "query" : {
-                                "d" : {
-                                        "$gte" : 10,
-                                        "$lte" : 20
-                                }
-                        },
-                        "sort" : {
-                                "d" : 1
-                        },
-                        "cursor" : {
-                                "cursor" : "BtreeCursor d_1",
-                                "isMultiKey" : false,
-                                "n" : 11,
-                                "nscannedObjects" : 11,
-                                "nscanned" : 11,
-                                "scanAndOrder" : false,
-                                "indexOnly" : false,
-                                "nYields" : 0,
-                                "nChunkSkips" : 0,
-                                "indexBounds" : {
-                                        "d" : [
-                                                [
-                                                        10,
-                                                        20
-                                                ]
-                                        ]
-                                },
-                                "allPlans" : [
-                                        {
-                                                "cursor" : "BtreeCursor d_1",
-                                                "n" : 11,
-                                                "nscannedObjects" : 11,
-                                                "nscanned" : 11,
-                                                "indexBounds" : {
-                                                        "d" : [
-                                                                [
-                                                                        10,
-                                                                        20
-                                                                ]
-                                                        ]
-                                                }
-                                        }
-                                ],
-                                "oldPlan" : {
-                                        "cursor" : "BtreeCursor d_1",
-                                        "indexBounds" : {
-                                                "d" : [
-                                                        [
-                                                                10,
-                                                                20
-                                                        ]
-                                                ]
-                                        }
-                                }
-                        }
-                },
-                {
-                        "$skip" : NumberLong(5)
-                },
-                {
-                        "$limit" : NumberLong(10)
-                }
-        ],
-    "ok" : 1
-};
-
-assert(documentEq(edi3, edi3result), 'edi3 failed');
-
-}
diff --git a/src/mongo/db/pipeline/dependencies.cpp b/src/mongo/db/pipeline/dependencies.cpp
index 30ced50e06a..40cce57e6f0 100644
--- a/src/mongo/db/pipeline/dependencies.cpp
+++ b/src/mongo/db/pipeline/dependencies.cpp
@@ -53,10 +53,7 @@ BSONObj DepsTracker::toProjection() const {
         return bb.obj();
 
     if (fields.empty()) {
-        // Projection language lacks good a way to say no fields needed. This fakes it.
-        bb.append("_id", 0);
-        bb.append("$noFieldsNeeded", 1);
-        return bb.obj();
+        return BSONObj();
     }
 
     bool needId = false;
diff --git a/src/mongo/db/pipeline/dependencies.h b/src/mongo/db/pipeline/dependencies.h
index b39ff1fcfd3..64b8f454a0b 100644
--- a/src/mongo/db/pipeline/dependencies.h
+++ b/src/mongo/db/pipeline/dependencies.h
@@ -53,6 +53,10 @@ struct DepsTracker {
     std::set<std::string> fields;  // names of needed fields in dotted notation
     bool needWholeDocument;        // if true, ignore fields and assume the whole document is needed
     bool needTextScore;
+
+    bool hasNoRequirements() const {
+        return fields.empty() && !needWholeDocument && !needTextScore;
+    }
 };
 
 /**
diff --git a/src/mongo/db/pipeline/document_source.h b/src/mongo/db/pipeline/document_source.h
index 2387c554790..1fa544be059 100644
--- a/src/mongo/db/pipeline/document_source.h
+++ b/src/mongo/db/pipeline/document_source.h
@@ -398,6 +398,14 @@ public:
     /// returns -1 for no limit
     long long getLimit() const;
 
+    /**
+     * If subsequent sources need no information from the cursor, the cursor can simply output empty
+     * documents, avoiding the overhead of converting BSONObjs to Documents.
+     */
+    void shouldProduceEmptyDocs() {
+        _shouldProduceEmptyDocs = true;
+    }
+
 private:
     DocumentSourceCursor(const std::string& ns,
                          const std::shared_ptr<PlanExecutor>& exec,
@@ -411,6 +419,7 @@ private:
     BSONObj _query;
     BSONObj _sort;
     BSONObj _projection;
+    bool _shouldProduceEmptyDocs = false;
     boost::optional<ParsedDeps> _dependencies;
     boost::intrusive_ptr<DocumentSourceLimit> _limit;
     long long _docsAddedToBatches;  // for _limit enforcement
diff --git a/src/mongo/db/pipeline/document_source_cursor.cpp b/src/mongo/db/pipeline/document_source_cursor.cpp
index e851f14cf93..c91c7fb2110 100644
--- a/src/mongo/db/pipeline/document_source_cursor.cpp
+++ b/src/mongo/db/pipeline/document_source_cursor.cpp
@@ -94,7 +94,9 @@ void DocumentSourceCursor::loadBatch() {
     BSONObj obj;
     PlanExecutor::ExecState state;
     while ((state = _exec->getNext(&obj, NULL)) == PlanExecutor::ADVANCED) {
-        if (_dependencies) {
+        if (_shouldProduceEmptyDocs) {
+            _currentBatch.push_back(Document());
+        } else if (_dependencies) {
             _currentBatch.push_back(_dependencies->extractFields(obj));
         } else {
             _currentBatch.push_back(Document::fromBsonWithMetaData(obj));
diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp
index 98bca7c1a9e..a41cf01f0d5 100644
--- a/src/mongo/db/pipeline/pipeline_d.cpp
+++ b/src/mongo/db/pipeline/pipeline_d.cpp
@@ -352,6 +352,12 @@ std::shared_ptr<PlanExecutor> PipelineD::prepareExecutor(
         plannerOpts |= QueryPlannerParams::INCLUDE_SHARD_FILTER;
     }
 
+    if (deps.hasNoRequirements()) {
+        // If we don't need any fields from the input document, performing a count is faster, and
+        // will output empty documents, which is okay.
+        plannerOpts |= QueryPlannerParams::IS_COUNT;
+    }
+
     // The only way to get a text score is to let the query system handle the projection. In all
     // other cases, unless the query system can do an index-covered projection and avoid going to
     // the raw record at all, it is faster to have ParsedDeps filter the fields we need.
@@ -431,6 +437,10 @@ shared_ptr<PlanExecutor> PipelineD::addCursorSource(const intrusive_ptr<Pipeline
     pSource->setQuery(queryObj);
     pSource->setSort(sortObj);
 
+    if (deps.hasNoRequirements()) {
+        pSource->shouldProduceEmptyDocs();
+    }
+
     if (!projectionObj.isEmpty()) {
         pSource->setProjection(projectionObj, boost::none);
     } else {
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index 4e9b8bc89d4..faea115ca02 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -315,8 +315,7 @@ Status prepareExecution(OperationContext* opCtx,
         Status status = QueryPlanner::planFromCache(*canonicalQuery, plannerParams, *cs, &qs);
 
         if (status.isOK()) {
-            if ((plannerParams.options & QueryPlannerParams::PRIVATE_IS_COUNT) &&
-                turnIxscanIntoCount(qs)) {
+            if ((plannerParams.options & QueryPlannerParams::IS_COUNT) && turnIxscanIntoCount(qs)) {
                 LOG(2) << "Using fast count: " << canonicalQuery->toStringShort();
             }
 
@@ -360,7 +359,7 @@ Status prepareExecution(OperationContext* opCtx,
     }
 
     // See if one of our solutions is a fast count hack in disguise.
-    if (plannerParams.options & QueryPlannerParams::PRIVATE_IS_COUNT) {
+    if (plannerParams.options & QueryPlannerParams::IS_COUNT) {
         for (size_t i = 0; i < solutions.size(); ++i) {
             if (turnIxscanIntoCount(solutions[i])) {
                 // Great, we can use solutions[i].  Clean up the other QuerySolution(s).
@@ -1200,7 +1199,7 @@ StatusWith<unique_ptr<PlanExecutor>> getExecutorCount(OperationContext* txn,
             txn, std::move(ws), std::move(root), request.getNs().ns(), yieldPolicy);
     }
 
-    const size_t plannerOptions = QueryPlannerParams::PRIVATE_IS_COUNT;
+    const size_t plannerOptions = QueryPlannerParams::IS_COUNT;
     PlanStage* child;
     QuerySolution* rawQuerySolution;
     Status prepStatus = prepareExecution(
diff --git a/src/mongo/db/query/query_planner_params.h b/src/mongo/db/query/query_planner_params.h
index 226c059c912..8210fb05e1a 100644
--- a/src/mongo/db/query/query_planner_params.h
+++ b/src/mongo/db/query/query_planner_params.h
@@ -74,9 +74,9 @@ struct QueryPlannerParams {
         // of the query in the query results.
         KEEP_MUTATIONS = 1 << 5,
 
-        // Nobody should set this above the getExecutor interface.  Internal flag set as a hint
-        // to the planner that the caller is actually the count command.
-        PRIVATE_IS_COUNT = 1 << 6,
+        // Indicate to the planner that the caller is requesting a count operation, possibly through
+        // a count command, or as part of an aggregation pipeline.
+        IS_COUNT = 1 << 6,
 
         // Set this if you want to handle batchSize properly with sort(). If limits on SORT
         // stages are always actually limits, then this should be left off. If they are
author	Benjamin Murphy <benjamin_murphy@me.com>	2016-02-29 15:27:18 -0500
committer	Benjamin Murphy <benjamin_murphy@me.com>	2016-03-04 10:05:05 -0500
commit	aee9f7e2a93d89ccbca459993565b182d5296dfa (patch)
tree	478963299a3dfafc238ffaa01940f652c73c81c5
parent	b465c40655a665f61f34fb225ca77492e47a868f (diff)
download	mongo-aee9f7e2a93d89ccbca459993565b182d5296dfa.tar.gz