SERVER-12460 faster count for simple queries

author: Hari Khalsa <hkhalsa@10gen.com> 2014-01-27 18:01:26 -0500
committer: Hari Khalsa <hkhalsa@10gen.com> 2014-01-29 13:18:47 -0500
commit: 3b3c25e571852893373ae2e2b361397b260687c9 (patch)
tree: e2350ef7605c1e5a6d33649d095d19a624803ea9 /src/mongo/db/query
parent: 2bee3f018b8d4351f8261c405adcdff44c7f9a70 (diff)
download: mongo-3b3c25e571852893373ae2e2b361397b260687c9.tar.gz
10 files changed, 375 insertions, 16 deletions
diff --git a/src/mongo/db/query/get_runner.cpp b/src/mongo/db/query/get_runner.cpp
index f230f4c95dc..36d5447b9c1 100644
--- a/src/mongo/db/query/get_runner.cpp
+++ b/src/mongo/db/query/get_runner.cpp
@@ -126,12 +126,19 @@ namespace mongo {
         return Status::OK();
     }
 
+    namespace {
+        // The body is below in the "count hack" section but getRunner calls it.
+        bool turnIxscanIntoCount(QuerySolution* soln);
+    }  // namespace
+
     /**
      * For a given query, get a runner.  The runner could be a SingleSolutionRunner, a
      * CachedQueryRunner, or a MultiPlanRunner, depending on the cache/query solver/etc.
      */
-    Status getRunner(Collection* collection, CanonicalQuery* rawCanonicalQuery,
-                     Runner** out, size_t plannerOptions) {
+    Status getRunner(Collection* collection,
+                     CanonicalQuery* rawCanonicalQuery,
+                     Runner** out,
+                     size_t plannerOptions) {
 
         verify(rawCanonicalQuery);
         auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery);
@@ -239,7 +246,22 @@ namespace mongo {
             QuerySolution *qs, *backupQs;
             Status status = QueryPlanner::planFromCache(*canonicalQuery, plannerParams, *cs,
                                                         &qs, &backupQs);
+
             if (status.isOK()) {
+                if (plannerParams.options & QueryPlannerParams::PRIVATE_IS_COUNT) {
+                    if (turnIxscanIntoCount(qs)) {
+                        WorkingSet* ws;
+                        PlanStage* root;
+                        verify(StageBuilder::build(*qs, &root, &ws));
+                        *out = new SingleSolutionRunner(collection,
+                                                        canonicalQuery.release(), qs, root, ws);
+                        if (NULL != backupQs) {
+                            delete backupQs;
+                        }
+                        return Status::OK();
+                    }
+                }
+
                 WorkingSet* ws;
                 PlanStage* root;
                 verify(StageBuilder::build(*qs, &root, &ws));
@@ -270,13 +292,8 @@ namespace mongo {
                           " planner returned error: " + status.reason());
         }
 
-        /*
-        for (size_t i = 0; i < solutions.size(); ++i) {
-            QLOG() << "solution " << i << " is " << solutions[i]->toString() << endl;
-        }
-        */
-
-        // We cannot figure out how to answer the query.  Should this ever happen?
+        // We cannot figure out how to answer the query.  Perhaps it requires an index
+        // we do not have?
         if (0 == solutions.size()) {
             return Status(ErrorCodes::BadValue,
                           str::stream()
@@ -285,6 +302,31 @@ namespace mongo {
                           << " No query solutions");
         }
 
+        // See if one of our solutions is a fast count hack in disguise.
+        if (plannerParams.options & QueryPlannerParams::PRIVATE_IS_COUNT) {
+            for (size_t i = 0; i < solutions.size(); ++i) {
+                if (turnIxscanIntoCount(solutions[i])) {
+                    // Great, we can use solutions[i].  Clean up the other QuerySolution(s).
+                    for (size_t j = 0; j < solutions.size(); ++j) {
+                        if (j != i) {
+                            delete solutions[j];
+                        }
+                    }
+
+                    // We're not going to cache anything that's fast count.
+                    WorkingSet* ws;
+                    PlanStage* root;
+                    verify(StageBuilder::build(*solutions[i], &root, &ws));
+                    *out = new SingleSolutionRunner(collection,
+                                                    canonicalQuery.release(),
+                                                    solutions[i],
+                                                    root,
+                                                    ws);
+                    return Status::OK();
+                }
+            }
+        }
+
         if (1 == solutions.size()) {
             // Only one possible plan.  Run it.  Build the stages from the solution.
             WorkingSet* ws;
@@ -315,6 +357,219 @@ namespace mongo {
     }
 
     //
+    // Count hack
+    //
+
+    namespace {
+
+        /**
+         * Returns 'true' if the bounds 'bounds' can be represented as an interval between two the two
+         * values 'startKey' and 'endKey'.  Inclusivity of each bound is set through the relevant
+         * fooKeyInclusive parameter.
+         *
+         * Returns 'false' otherwise.
+         *
+         * XXX: unit test this.
+         */
+        bool isSingleInterval(const IndexBounds& bounds,
+                              BSONObj* startKey,
+                              bool* startKeyInclusive,
+                              BSONObj* endKey,
+                              bool* endKeyInclusive) {
+            // We build our start/end keys as we go.
+            BSONObjBuilder startBob;
+            BSONObjBuilder endBob;
+
+            // The start and end keys are inclusive unless we have a non-point interval, in which case
+            // we take the inclusivity from there.
+            *startKeyInclusive = true;
+            *endKeyInclusive = true;
+
+            size_t fieldNo = 0;
+
+            // First, we skip over point intervals.
+            for (; fieldNo < bounds.fields.size(); ++fieldNo) {
+                const OrderedIntervalList& oil = bounds.fields[fieldNo];
+                // A point interval requires just one interval...
+                if (1 != oil.intervals.size()) {
+                    break;
+                }
+                if (!oil.intervals[0].isPoint()) {
+                    break;
+                }
+                // Since it's a point, start == end.
+                startBob.append(oil.intervals[0].start);
+                endBob.append(oil.intervals[0].end);
+            }
+
+            if (fieldNo >= bounds.fields.size()) {
+                // All our intervals are points.  We count for all values of one field.
+                *startKey = startBob.obj();
+                *endKey = endBob.obj();
+                return true;
+            }
+
+            // After point intervals we can have exactly one non-point interval.
+            const OrderedIntervalList& nonPoint = bounds.fields[fieldNo];
+            if (1 != nonPoint.intervals.size()) {
+                return false;
+            }
+
+            // Add the non-point interval to our builder and set the inclusivity from it.
+            startBob.append(nonPoint.intervals[0].start);
+            *startKeyInclusive = nonPoint.intervals[0].startInclusive;
+            endBob.append(nonPoint.intervals[0].end);
+            *endKeyInclusive = nonPoint.intervals[0].endInclusive;
+
+            ++fieldNo;
+
+            // Get some "all values" intervals for comparison's sake.
+            // TODO: make static?
+            Interval minMax = IndexBoundsBuilder::allValues();
+            Interval maxMin = minMax;
+            maxMin.reverse();
+
+            // And after the non-point interval we can have any number of "all values" intervals.
+            for (; fieldNo < bounds.fields.size(); ++fieldNo) {
+                const OrderedIntervalList& oil = bounds.fields[fieldNo];
+                // "All Values" is just one point.
+                if (1 != oil.intervals.size()) {
+                    break;
+                }
+
+                // Must be min->max or max->min.
+                if (oil.intervals[0].equals(minMax)) {
+                    // As an example for the logic below, consider the index {a:1, b:1} and a count for
+                    // {a: {$gt: 2}}.  Our start key isn't inclusive (as it's $gt: 2) and looks like
+                    // {"":2} so far.  If we move to the key greater than {"":2, "": MaxKey} we will get
+                    // the first value of 'a' that is greater than 2.
+                    if (!*startKeyInclusive) {
+                        startBob.appendMaxKey("");
+                    }
+                    else {
+                        // In this case, consider the index {a:1, b:1} and a count for {a:{$gte: 2}}.
+                        // We want to look at all values where a is 2, so our start key is {"":2,
+                        // "":MinKey}.
+                        startBob.appendMinKey("");
+                    }
+
+                    // Same deal as above.  Consider the index {a:1, b:1} and a count for {a: {$lt: 2}}.
+                    // Our end key isn't inclusive as ($lt: 2) and looks like {"":2} so far.  We can't
+                    // look at any values where a is 2 so we have to stop at {"":2, "": MinKey} as
+                    // that's the smallest key where a is still 2.
+                    if (!*endKeyInclusive) {
+                        endBob.appendMinKey("");
+                    }
+                    else {
+                        endBob.appendMaxKey("");
+                    }
+                }
+                else if (oil.intervals[0].equals(maxMin)) {
+                    // The reasoning here is the same as above but with the directions reversed.
+                    if (!*startKeyInclusive) {
+                        startBob.appendMinKey("");
+                    }
+                    else {
+                        startBob.appendMaxKey("");
+                    }
+                    if (!*endKeyInclusive) {
+                        endBob.appendMaxKey("");
+                    }
+                    else {
+                        endBob.appendMinKey("");
+                    }
+                }
+                else {
+                    // No dice.
+                    break;
+                }
+            }
+
+            if (fieldNo >= bounds.fields.size()) {
+                *startKey = startBob.obj();
+                *endKey = endBob.obj();
+                return true;
+            }
+            else {
+                return false;
+            }
+        }
+
+        /**
+         * Returns 'true' if the provided solution 'soln' can be rewritten to use
+         * a fast counting stage.  Mutates the tree in 'soln->root'.
+         *
+         * Otherwise, returns 'false'.
+         */
+        bool turnIxscanIntoCount(QuerySolution* soln) {
+            QuerySolutionNode* root = soln->root.get();
+
+            // Root should be a fetch w/o any filters.
+            if (STAGE_FETCH != root->getType()) {
+                return false;
+            }
+
+            if (NULL != root->filter.get()) {
+                return false;
+            }
+
+            // Child should be an ixscan.
+            if (STAGE_IXSCAN != root->children[0]->getType()) {
+                return false;
+            }
+
+            IndexScanNode* isn = static_cast<IndexScanNode*>(root->children[0]);
+
+            // No filters allowed and side-stepping isSimpleRange for now.  TODO: do we ever see
+            // isSimpleRange here?  because we could well use it.  I just don't think we ever do see it.
+            if (NULL != isn->filter.get() || isn->bounds.isSimpleRange) {
+                return false;
+            }
+
+            // Make sure the bounds are OK.
+            BSONObj startKey;
+            bool startKeyInclusive;
+            BSONObj endKey;
+            bool endKeyInclusive;
+
+            if (!isSingleInterval(isn->bounds, &startKey, &startKeyInclusive, &endKey, &endKeyInclusive)) {
+                return false;
+            }
+
+            // Make the count node that we replace the fetch + ixscan with.
+            CountNode* cn = new CountNode();
+            cn->indexKeyPattern = isn->indexKeyPattern;
+            cn->startKey = startKey;
+            cn->startKeyInclusive = startKeyInclusive;
+            cn->endKey = endKey;
+            cn->endKeyInclusive = endKeyInclusive;
+            // Takes ownership of 'cn' and deletes the old root.
+            soln->root.reset(cn);
+            return true;
+        }
+
+    }  // namespace
+
+    Status getRunnerCount(Collection* collection,
+                          const BSONObj& query,
+                          const BSONObj& hintObj,
+                          Runner** out) {
+        verify(collection);
+
+        CanonicalQuery* cq;
+        uassertStatusOK(CanonicalQuery::canonicalize(collection->ns().ns(),
+                                                     query,
+                                                     BSONObj(),
+                                                     BSONObj(), 
+                                                     0,
+                                                     0,
+                                                     hintObj,
+                                                     &cq)); 
+
+        return getRunner(collection, cq, out, QueryPlannerParams::PRIVATE_IS_COUNT);
+    }
+
+    //
     // Distinct hack
     //
 
@@ -378,10 +633,6 @@ namespace mongo {
                              const BSONObj& query,
                              const string& field,
                              Runner** out) {
-
-        Database* db = cc().database();
-        verify(db);
-
         // This should'a been checked by the distinct command.
         verify(collection);
 
diff --git a/src/mongo/db/query/get_runner.h b/src/mongo/db/query/get_runner.h
index 5ac91282c77..c5011d7639c 100644
--- a/src/mongo/db/query/get_runner.h
+++ b/src/mongo/db/query/get_runner.h
@@ -53,7 +53,9 @@ namespace mongo {
      * If the query cannot be executed, returns a Status indicating why.  Deletes
      * rawCanonicalQuery.
      */
-    Status getRunner(CanonicalQuery* rawCanonicalQuery, Runner** out, size_t plannerOptions = 0);
+    Status getRunner(CanonicalQuery* rawCanonicalQuery,
+                     Runner** out,
+                     size_t plannerOptions = 0);
 
     /**
      * Get a runner for a query.  Takes ownership of rawCanonicalQuery.
@@ -62,8 +64,10 @@ namespace mongo {
      * work to obtain the Collection has already been done by the caller. The 'collection'
      * argument may be NULL.
      */
-    Status getRunner(Collection* collection, CanonicalQuery* rawCanonicalQuery,
-                     Runner** out, size_t plannerOptions = 0);
+    Status getRunner(Collection* collection,
+                     CanonicalQuery* rawCanonicalQuery,
+                     Runner** out,
+                     size_t plannerOptions = 0);
 
     /**
      * Gets a runner for a query described as an unparsed BSON object over the named and optionally
@@ -79,6 +83,7 @@ namespace mongo {
     Status getRunner(Collection* collection, const std::string& ns, const BSONObj& unparsedQuery,
                      Runner** outRunner, CanonicalQuery** outCanonicalQuery,
                      size_t plannerOptions = 0);
+
     /*
      * Get a runner for a query executing as part of a distinct command.
      *
@@ -90,6 +95,17 @@ namespace mongo {
                              const BSONObj& query,
                              const std::string& field,
                              Runner** out);
+    /*
+     * Get a runner for a query executing as part of a count command.
+     *
+     * Count doesn't care about actually examining its results; it just wants to walk through them.
+     * As such, with certain covered queries, we can skip the overhead of fetching etc. when
+     * executing a count.
+     */
+    Status getRunnerCount(Collection* collection,
+                          const BSONObj& query,
+                          const BSONObj& hintObj,
+                          Runner** out);
 
     /**
      * RAII approach to ensuring that runners are deregistered in newRunQuery.
diff --git a/src/mongo/db/query/interval.cpp b/src/mongo/db/query/interval.cpp
index 9e1c7972273..a587d64b312 100644
--- a/src/mongo/db/query/interval.cpp
+++ b/src/mongo/db/query/interval.cpp
@@ -134,6 +134,11 @@ namespace mongo {
         return _intervalData.nFields() == 0;
     }
 
+    // XXX: move the stuff in the anonymous namespace into 'this' and clean this file up in general.
+    bool Interval::equals(const Interval& other) const {
+        return exact(*this, other);
+    }
+
     // TODO: shortcut number of comparisons
     Interval::IntervalComparison Interval::compare(const Interval& other) const {
         //
diff --git a/src/mongo/db/query/interval.h b/src/mongo/db/query/interval.h
index e74cabcee35..1b8e381bc17 100644
--- a/src/mongo/db/query/interval.h
+++ b/src/mongo/db/query/interval.h
@@ -98,6 +98,9 @@ namespace mongo {
             return (!startInclusive || !endInclusive) && 0 == start.woCompare(end, false);
         }
 
+        /** Returns true if 'this' is the same interval as 'other' */
+        bool equals(const Interval& other) const;
+
         /**
          * Swap start and end points of interval.
          */
diff --git a/src/mongo/db/query/plan_executor.cpp b/src/mongo/db/query/plan_executor.cpp
index c3c5a4fdd42..6bb69de0bfc 100644
--- a/src/mongo/db/query/plan_executor.cpp
+++ b/src/mongo/db/query/plan_executor.cpp
@@ -89,6 +89,13 @@ namespace mongo {
             PlanStage::StageState code = _root->work(&id);
 
             if (PlanStage::ADVANCED == code) {
+                // Fast count.
+                if (WorkingSet::INVALID_ID == id) {
+                    invariant(NULL == objOut);
+                    invariant(NULL == dlOut);
+                    return Runner::RUNNER_ADVANCED;
+                }
+
                 WorkingSetMember* member = _workingSet->get(id);
                 bool hasRequestedData = true;
 
diff --git a/src/mongo/db/query/query_planner_params.h b/src/mongo/db/query/query_planner_params.h
index 791cd7c42c5..2ed98e552fe 100644
--- a/src/mongo/db/query/query_planner_params.h
+++ b/src/mongo/db/query/query_planner_params.h
@@ -68,6 +68,10 @@ namespace mongo {
             // Set this if you want to try to keep documents deleted or mutated during the execution
             // of the query in the query results.
             KEEP_MUTATIONS = 1 << 5,
+
+            // Nobody should set this above the getRunner interface.  Internal flag set as a hint to
+            // the planner that the caller is actually the count command.
+            PRIVATE_IS_COUNT = 1 << 6,
         };
 
         // See Options enum above.
diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp
index 71ef22668f4..6866d017e4c 100644
--- a/src/mongo/db/query/query_solution.cpp
+++ b/src/mongo/db/query/query_solution.cpp
@@ -634,4 +634,19 @@ namespace mongo {
         *ss << "bounds = " << bounds.toString() << '\n';
     }
 
+    //
+    // CountNode
+    //
+
+    void CountNode::appendToString(mongoutils::str::stream* ss, int indent) const {
+        addIndent(ss, indent);
+        *ss << "COUNT\n";
+        addIndent(ss, indent + 1);
+        *ss << "keyPattern = " << indexKeyPattern << '\n';
+        addIndent(ss, indent + 1);
+        *ss << "startKey = " << startKey << '\n';
+        addIndent(ss, indent + 1);
+        *ss << "endKey = " << endKey << '\n';
+    }
+
 }  // namespace mongo
diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h
index 6c3964f86b5..ca0aaa24255 100644
--- a/src/mongo/db/query/query_solution.h
+++ b/src/mongo/db/query/query_solution.h
@@ -618,4 +618,30 @@ namespace mongo {
         int fieldNo;
     };
 
+    /**
+     * Some count queries reduce to counting how many keys are between two entries in a
+     * Btree.
+     */
+    struct CountNode : public QuerySolutionNode {
+        CountNode() { }
+        virtual ~CountNode() { }
+
+        virtual StageType getType() const { return STAGE_COUNT; }
+        virtual void appendToString(mongoutils::str::stream* ss, int indent) const;
+
+        bool fetched() const { return true; }
+        bool hasField(const string& field) const { return true; }
+        bool sortedByDiskLoc() const { return false; }
+        const BSONObjSet& getSort() const { return sorts; }
+        BSONObjSet sorts;
+
+        BSONObj indexKeyPattern;
+
+        BSONObj startKey;
+        bool startKeyInclusive;
+
+        BSONObj endKey;
+        bool endKeyInclusive;
+    };
+
 }  // namespace mongo
diff --git a/src/mongo/db/query/stage_builder.cpp b/src/mongo/db/query/stage_builder.cpp
index 9d4988b4783..395eee8525e 100644
--- a/src/mongo/db/query/stage_builder.cpp
+++ b/src/mongo/db/query/stage_builder.cpp
@@ -33,6 +33,7 @@
 #include "mongo/db/exec/and_hash.h"
 #include "mongo/db/exec/and_sorted.h"
 #include "mongo/db/exec/collection_scan.h"
+#include "mongo/db/exec/count.h"
 #include "mongo/db/exec/distinct_scan.h"
 #include "mongo/db/exec/fetch.h"
 #include "mongo/db/exec/index_scan.h"
@@ -279,6 +280,32 @@ namespace mongo {
             params.fieldNo = dn->fieldNo;
             return new DistinctScan(params, ws);
         }
+        else if (STAGE_COUNT == root->getType()) {
+            const CountNode* cn = static_cast<const CountNode*>(root);
+
+            Database* db = cc().database();
+            if (NULL == db) {
+                warning() << "Can't fast-count null ns (db null)" << qsol.ns << endl;
+                return NULL;
+            }
+
+            Collection* collection = db ? db->getCollection(qsol.ns) : NULL;
+            if (NULL == collection) {
+                warning() << "Can't fast-count null ns (coll null)" << qsol.ns << endl;
+                return NULL;
+            }
+
+            CountParams params;
+
+            params.descriptor =
+                collection->getIndexCatalog()->findIndexByKeyPattern(cn->indexKeyPattern);
+            params.startKey = cn->startKey;
+            params.startKeyInclusive = cn->startKeyInclusive;
+            params.endKey = cn->endKey;
+            params.endKeyInclusive = cn->endKeyInclusive;
+
+            return new Count(params, ws);
+        }
         else {
             mongoutils::str::stream ss;
             root->appendToString(&ss, 0);
diff --git a/src/mongo/db/query/stage_types.h b/src/mongo/db/query/stage_types.h
index ecbacb9068a..cb1ffb391d1 100644
--- a/src/mongo/db/query/stage_types.h
+++ b/src/mongo/db/query/stage_types.h
@@ -38,6 +38,11 @@ namespace mongo {
         STAGE_AND_SORTED,
         STAGE_COLLSCAN,
 
+        // If we're running a .count(), the query is fully covered by one ixscan, and the ixscan is
+        // from one key to another, we can just skip through the keys without bothering to examine
+        // them.
+        STAGE_COUNT,
+
         // If we're running a distinct, we only care about one value for each key.  The distinct
         // stage is an ixscan with some key-skipping behvaior that only distinct uses.
         STAGE_DISTINCT,
author	Hari Khalsa <hkhalsa@10gen.com>	2014-01-27 18:01:26 -0500
committer	Hari Khalsa <hkhalsa@10gen.com>	2014-01-29 13:18:47 -0500
commit	3b3c25e571852893373ae2e2b361397b260687c9 (patch)
tree	e2350ef7605c1e5a6d33649d095d19a624803ea9 /src/mongo/db/query
parent	2bee3f018b8d4351f8261c405adcdff44c7f9a70 (diff)
download	mongo-3b3c25e571852893373ae2e2b361397b260687c9.tar.gz