diff options
author | Hari Khalsa <hkhalsa@10gen.com> | 2014-01-27 18:01:26 -0500 |
---|---|---|
committer | Hari Khalsa <hkhalsa@10gen.com> | 2014-01-29 13:18:47 -0500 |
commit | 3b3c25e571852893373ae2e2b361397b260687c9 (patch) | |
tree | e2350ef7605c1e5a6d33649d095d19a624803ea9 /src/mongo/db/query | |
parent | 2bee3f018b8d4351f8261c405adcdff44c7f9a70 (diff) | |
download | mongo-3b3c25e571852893373ae2e2b361397b260687c9.tar.gz |
SERVER-12460 faster count for simple queries
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r-- | src/mongo/db/query/get_runner.cpp | 277 | ||||
-rw-r--r-- | src/mongo/db/query/get_runner.h | 22 | ||||
-rw-r--r-- | src/mongo/db/query/interval.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/query/interval.h | 3 | ||||
-rw-r--r-- | src/mongo/db/query/plan_executor.cpp | 7 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_params.h | 4 | ||||
-rw-r--r-- | src/mongo/db/query/query_solution.cpp | 15 | ||||
-rw-r--r-- | src/mongo/db/query/query_solution.h | 26 | ||||
-rw-r--r-- | src/mongo/db/query/stage_builder.cpp | 27 | ||||
-rw-r--r-- | src/mongo/db/query/stage_types.h | 5 |
10 files changed, 375 insertions, 16 deletions
diff --git a/src/mongo/db/query/get_runner.cpp b/src/mongo/db/query/get_runner.cpp index f230f4c95dc..36d5447b9c1 100644 --- a/src/mongo/db/query/get_runner.cpp +++ b/src/mongo/db/query/get_runner.cpp @@ -126,12 +126,19 @@ namespace mongo { return Status::OK(); } + namespace { + // The body is below in the "count hack" section but getRunner calls it. + bool turnIxscanIntoCount(QuerySolution* soln); + } // namespace + /** * For a given query, get a runner. The runner could be a SingleSolutionRunner, a * CachedQueryRunner, or a MultiPlanRunner, depending on the cache/query solver/etc. */ - Status getRunner(Collection* collection, CanonicalQuery* rawCanonicalQuery, - Runner** out, size_t plannerOptions) { + Status getRunner(Collection* collection, + CanonicalQuery* rawCanonicalQuery, + Runner** out, + size_t plannerOptions) { verify(rawCanonicalQuery); auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery); @@ -239,7 +246,22 @@ namespace mongo { QuerySolution *qs, *backupQs; Status status = QueryPlanner::planFromCache(*canonicalQuery, plannerParams, *cs, &qs, &backupQs); + if (status.isOK()) { + if (plannerParams.options & QueryPlannerParams::PRIVATE_IS_COUNT) { + if (turnIxscanIntoCount(qs)) { + WorkingSet* ws; + PlanStage* root; + verify(StageBuilder::build(*qs, &root, &ws)); + *out = new SingleSolutionRunner(collection, + canonicalQuery.release(), qs, root, ws); + if (NULL != backupQs) { + delete backupQs; + } + return Status::OK(); + } + } + WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(*qs, &root, &ws)); @@ -270,13 +292,8 @@ namespace mongo { " planner returned error: " + status.reason()); } - /* - for (size_t i = 0; i < solutions.size(); ++i) { - QLOG() << "solution " << i << " is " << solutions[i]->toString() << endl; - } - */ - - // We cannot figure out how to answer the query. Should this ever happen? + // We cannot figure out how to answer the query. Perhaps it requires an index + // we do not have? if (0 == solutions.size()) { return Status(ErrorCodes::BadValue, str::stream() @@ -285,6 +302,31 @@ namespace mongo { << " No query solutions"); } + // See if one of our solutions is a fast count hack in disguise. + if (plannerParams.options & QueryPlannerParams::PRIVATE_IS_COUNT) { + for (size_t i = 0; i < solutions.size(); ++i) { + if (turnIxscanIntoCount(solutions[i])) { + // Great, we can use solutions[i]. Clean up the other QuerySolution(s). + for (size_t j = 0; j < solutions.size(); ++j) { + if (j != i) { + delete solutions[j]; + } + } + + // We're not going to cache anything that's fast count. + WorkingSet* ws; + PlanStage* root; + verify(StageBuilder::build(*solutions[i], &root, &ws)); + *out = new SingleSolutionRunner(collection, + canonicalQuery.release(), + solutions[i], + root, + ws); + return Status::OK(); + } + } + } + if (1 == solutions.size()) { // Only one possible plan. Run it. Build the stages from the solution. WorkingSet* ws; @@ -315,6 +357,219 @@ namespace mongo { } // + // Count hack + // + + namespace { + + /** + * Returns 'true' if the bounds 'bounds' can be represented as an interval between two the two + * values 'startKey' and 'endKey'. Inclusivity of each bound is set through the relevant + * fooKeyInclusive parameter. + * + * Returns 'false' otherwise. + * + * XXX: unit test this. + */ + bool isSingleInterval(const IndexBounds& bounds, + BSONObj* startKey, + bool* startKeyInclusive, + BSONObj* endKey, + bool* endKeyInclusive) { + // We build our start/end keys as we go. + BSONObjBuilder startBob; + BSONObjBuilder endBob; + + // The start and end keys are inclusive unless we have a non-point interval, in which case + // we take the inclusivity from there. + *startKeyInclusive = true; + *endKeyInclusive = true; + + size_t fieldNo = 0; + + // First, we skip over point intervals. + for (; fieldNo < bounds.fields.size(); ++fieldNo) { + const OrderedIntervalList& oil = bounds.fields[fieldNo]; + // A point interval requires just one interval... + if (1 != oil.intervals.size()) { + break; + } + if (!oil.intervals[0].isPoint()) { + break; + } + // Since it's a point, start == end. + startBob.append(oil.intervals[0].start); + endBob.append(oil.intervals[0].end); + } + + if (fieldNo >= bounds.fields.size()) { + // All our intervals are points. We count for all values of one field. + *startKey = startBob.obj(); + *endKey = endBob.obj(); + return true; + } + + // After point intervals we can have exactly one non-point interval. + const OrderedIntervalList& nonPoint = bounds.fields[fieldNo]; + if (1 != nonPoint.intervals.size()) { + return false; + } + + // Add the non-point interval to our builder and set the inclusivity from it. + startBob.append(nonPoint.intervals[0].start); + *startKeyInclusive = nonPoint.intervals[0].startInclusive; + endBob.append(nonPoint.intervals[0].end); + *endKeyInclusive = nonPoint.intervals[0].endInclusive; + + ++fieldNo; + + // Get some "all values" intervals for comparison's sake. + // TODO: make static? + Interval minMax = IndexBoundsBuilder::allValues(); + Interval maxMin = minMax; + maxMin.reverse(); + + // And after the non-point interval we can have any number of "all values" intervals. + for (; fieldNo < bounds.fields.size(); ++fieldNo) { + const OrderedIntervalList& oil = bounds.fields[fieldNo]; + // "All Values" is just one point. + if (1 != oil.intervals.size()) { + break; + } + + // Must be min->max or max->min. + if (oil.intervals[0].equals(minMax)) { + // As an example for the logic below, consider the index {a:1, b:1} and a count for + // {a: {$gt: 2}}. Our start key isn't inclusive (as it's $gt: 2) and looks like + // {"":2} so far. If we move to the key greater than {"":2, "": MaxKey} we will get + // the first value of 'a' that is greater than 2. + if (!*startKeyInclusive) { + startBob.appendMaxKey(""); + } + else { + // In this case, consider the index {a:1, b:1} and a count for {a:{$gte: 2}}. + // We want to look at all values where a is 2, so our start key is {"":2, + // "":MinKey}. + startBob.appendMinKey(""); + } + + // Same deal as above. Consider the index {a:1, b:1} and a count for {a: {$lt: 2}}. + // Our end key isn't inclusive as ($lt: 2) and looks like {"":2} so far. We can't + // look at any values where a is 2 so we have to stop at {"":2, "": MinKey} as + // that's the smallest key where a is still 2. + if (!*endKeyInclusive) { + endBob.appendMinKey(""); + } + else { + endBob.appendMaxKey(""); + } + } + else if (oil.intervals[0].equals(maxMin)) { + // The reasoning here is the same as above but with the directions reversed. + if (!*startKeyInclusive) { + startBob.appendMinKey(""); + } + else { + startBob.appendMaxKey(""); + } + if (!*endKeyInclusive) { + endBob.appendMaxKey(""); + } + else { + endBob.appendMinKey(""); + } + } + else { + // No dice. + break; + } + } + + if (fieldNo >= bounds.fields.size()) { + *startKey = startBob.obj(); + *endKey = endBob.obj(); + return true; + } + else { + return false; + } + } + + /** + * Returns 'true' if the provided solution 'soln' can be rewritten to use + * a fast counting stage. Mutates the tree in 'soln->root'. + * + * Otherwise, returns 'false'. + */ + bool turnIxscanIntoCount(QuerySolution* soln) { + QuerySolutionNode* root = soln->root.get(); + + // Root should be a fetch w/o any filters. + if (STAGE_FETCH != root->getType()) { + return false; + } + + if (NULL != root->filter.get()) { + return false; + } + + // Child should be an ixscan. + if (STAGE_IXSCAN != root->children[0]->getType()) { + return false; + } + + IndexScanNode* isn = static_cast<IndexScanNode*>(root->children[0]); + + // No filters allowed and side-stepping isSimpleRange for now. TODO: do we ever see + // isSimpleRange here? because we could well use it. I just don't think we ever do see it. + if (NULL != isn->filter.get() || isn->bounds.isSimpleRange) { + return false; + } + + // Make sure the bounds are OK. + BSONObj startKey; + bool startKeyInclusive; + BSONObj endKey; + bool endKeyInclusive; + + if (!isSingleInterval(isn->bounds, &startKey, &startKeyInclusive, &endKey, &endKeyInclusive)) { + return false; + } + + // Make the count node that we replace the fetch + ixscan with. + CountNode* cn = new CountNode(); + cn->indexKeyPattern = isn->indexKeyPattern; + cn->startKey = startKey; + cn->startKeyInclusive = startKeyInclusive; + cn->endKey = endKey; + cn->endKeyInclusive = endKeyInclusive; + // Takes ownership of 'cn' and deletes the old root. + soln->root.reset(cn); + return true; + } + + } // namespace + + Status getRunnerCount(Collection* collection, + const BSONObj& query, + const BSONObj& hintObj, + Runner** out) { + verify(collection); + + CanonicalQuery* cq; + uassertStatusOK(CanonicalQuery::canonicalize(collection->ns().ns(), + query, + BSONObj(), + BSONObj(), + 0, + 0, + hintObj, + &cq)); + + return getRunner(collection, cq, out, QueryPlannerParams::PRIVATE_IS_COUNT); + } + + // // Distinct hack // @@ -378,10 +633,6 @@ namespace mongo { const BSONObj& query, const string& field, Runner** out) { - - Database* db = cc().database(); - verify(db); - // This should'a been checked by the distinct command. verify(collection); diff --git a/src/mongo/db/query/get_runner.h b/src/mongo/db/query/get_runner.h index 5ac91282c77..c5011d7639c 100644 --- a/src/mongo/db/query/get_runner.h +++ b/src/mongo/db/query/get_runner.h @@ -53,7 +53,9 @@ namespace mongo { * If the query cannot be executed, returns a Status indicating why. Deletes * rawCanonicalQuery. */ - Status getRunner(CanonicalQuery* rawCanonicalQuery, Runner** out, size_t plannerOptions = 0); + Status getRunner(CanonicalQuery* rawCanonicalQuery, + Runner** out, + size_t plannerOptions = 0); /** * Get a runner for a query. Takes ownership of rawCanonicalQuery. @@ -62,8 +64,10 @@ namespace mongo { * work to obtain the Collection has already been done by the caller. The 'collection' * argument may be NULL. */ - Status getRunner(Collection* collection, CanonicalQuery* rawCanonicalQuery, - Runner** out, size_t plannerOptions = 0); + Status getRunner(Collection* collection, + CanonicalQuery* rawCanonicalQuery, + Runner** out, + size_t plannerOptions = 0); /** * Gets a runner for a query described as an unparsed BSON object over the named and optionally @@ -79,6 +83,7 @@ namespace mongo { Status getRunner(Collection* collection, const std::string& ns, const BSONObj& unparsedQuery, Runner** outRunner, CanonicalQuery** outCanonicalQuery, size_t plannerOptions = 0); + /* * Get a runner for a query executing as part of a distinct command. * @@ -90,6 +95,17 @@ namespace mongo { const BSONObj& query, const std::string& field, Runner** out); + /* + * Get a runner for a query executing as part of a count command. + * + * Count doesn't care about actually examining its results; it just wants to walk through them. + * As such, with certain covered queries, we can skip the overhead of fetching etc. when + * executing a count. + */ + Status getRunnerCount(Collection* collection, + const BSONObj& query, + const BSONObj& hintObj, + Runner** out); /** * RAII approach to ensuring that runners are deregistered in newRunQuery. diff --git a/src/mongo/db/query/interval.cpp b/src/mongo/db/query/interval.cpp index 9e1c7972273..a587d64b312 100644 --- a/src/mongo/db/query/interval.cpp +++ b/src/mongo/db/query/interval.cpp @@ -134,6 +134,11 @@ namespace mongo { return _intervalData.nFields() == 0; } + // XXX: move the stuff in the anonymous namespace into 'this' and clean this file up in general. + bool Interval::equals(const Interval& other) const { + return exact(*this, other); + } + // TODO: shortcut number of comparisons Interval::IntervalComparison Interval::compare(const Interval& other) const { // diff --git a/src/mongo/db/query/interval.h b/src/mongo/db/query/interval.h index e74cabcee35..1b8e381bc17 100644 --- a/src/mongo/db/query/interval.h +++ b/src/mongo/db/query/interval.h @@ -98,6 +98,9 @@ namespace mongo { return (!startInclusive || !endInclusive) && 0 == start.woCompare(end, false); } + /** Returns true if 'this' is the same interval as 'other' */ + bool equals(const Interval& other) const; + /** * Swap start and end points of interval. */ diff --git a/src/mongo/db/query/plan_executor.cpp b/src/mongo/db/query/plan_executor.cpp index c3c5a4fdd42..6bb69de0bfc 100644 --- a/src/mongo/db/query/plan_executor.cpp +++ b/src/mongo/db/query/plan_executor.cpp @@ -89,6 +89,13 @@ namespace mongo { PlanStage::StageState code = _root->work(&id); if (PlanStage::ADVANCED == code) { + // Fast count. + if (WorkingSet::INVALID_ID == id) { + invariant(NULL == objOut); + invariant(NULL == dlOut); + return Runner::RUNNER_ADVANCED; + } + WorkingSetMember* member = _workingSet->get(id); bool hasRequestedData = true; diff --git a/src/mongo/db/query/query_planner_params.h b/src/mongo/db/query/query_planner_params.h index 791cd7c42c5..2ed98e552fe 100644 --- a/src/mongo/db/query/query_planner_params.h +++ b/src/mongo/db/query/query_planner_params.h @@ -68,6 +68,10 @@ namespace mongo { // Set this if you want to try to keep documents deleted or mutated during the execution // of the query in the query results. KEEP_MUTATIONS = 1 << 5, + + // Nobody should set this above the getRunner interface. Internal flag set as a hint to + // the planner that the caller is actually the count command. + PRIVATE_IS_COUNT = 1 << 6, }; // See Options enum above. diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp index 71ef22668f4..6866d017e4c 100644 --- a/src/mongo/db/query/query_solution.cpp +++ b/src/mongo/db/query/query_solution.cpp @@ -634,4 +634,19 @@ namespace mongo { *ss << "bounds = " << bounds.toString() << '\n'; } + // + // CountNode + // + + void CountNode::appendToString(mongoutils::str::stream* ss, int indent) const { + addIndent(ss, indent); + *ss << "COUNT\n"; + addIndent(ss, indent + 1); + *ss << "keyPattern = " << indexKeyPattern << '\n'; + addIndent(ss, indent + 1); + *ss << "startKey = " << startKey << '\n'; + addIndent(ss, indent + 1); + *ss << "endKey = " << endKey << '\n'; + } + } // namespace mongo diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h index 6c3964f86b5..ca0aaa24255 100644 --- a/src/mongo/db/query/query_solution.h +++ b/src/mongo/db/query/query_solution.h @@ -618,4 +618,30 @@ namespace mongo { int fieldNo; }; + /** + * Some count queries reduce to counting how many keys are between two entries in a + * Btree. + */ + struct CountNode : public QuerySolutionNode { + CountNode() { } + virtual ~CountNode() { } + + virtual StageType getType() const { return STAGE_COUNT; } + virtual void appendToString(mongoutils::str::stream* ss, int indent) const; + + bool fetched() const { return true; } + bool hasField(const string& field) const { return true; } + bool sortedByDiskLoc() const { return false; } + const BSONObjSet& getSort() const { return sorts; } + BSONObjSet sorts; + + BSONObj indexKeyPattern; + + BSONObj startKey; + bool startKeyInclusive; + + BSONObj endKey; + bool endKeyInclusive; + }; + } // namespace mongo diff --git a/src/mongo/db/query/stage_builder.cpp b/src/mongo/db/query/stage_builder.cpp index 9d4988b4783..395eee8525e 100644 --- a/src/mongo/db/query/stage_builder.cpp +++ b/src/mongo/db/query/stage_builder.cpp @@ -33,6 +33,7 @@ #include "mongo/db/exec/and_hash.h" #include "mongo/db/exec/and_sorted.h" #include "mongo/db/exec/collection_scan.h" +#include "mongo/db/exec/count.h" #include "mongo/db/exec/distinct_scan.h" #include "mongo/db/exec/fetch.h" #include "mongo/db/exec/index_scan.h" @@ -279,6 +280,32 @@ namespace mongo { params.fieldNo = dn->fieldNo; return new DistinctScan(params, ws); } + else if (STAGE_COUNT == root->getType()) { + const CountNode* cn = static_cast<const CountNode*>(root); + + Database* db = cc().database(); + if (NULL == db) { + warning() << "Can't fast-count null ns (db null)" << qsol.ns << endl; + return NULL; + } + + Collection* collection = db ? db->getCollection(qsol.ns) : NULL; + if (NULL == collection) { + warning() << "Can't fast-count null ns (coll null)" << qsol.ns << endl; + return NULL; + } + + CountParams params; + + params.descriptor = + collection->getIndexCatalog()->findIndexByKeyPattern(cn->indexKeyPattern); + params.startKey = cn->startKey; + params.startKeyInclusive = cn->startKeyInclusive; + params.endKey = cn->endKey; + params.endKeyInclusive = cn->endKeyInclusive; + + return new Count(params, ws); + } else { mongoutils::str::stream ss; root->appendToString(&ss, 0); diff --git a/src/mongo/db/query/stage_types.h b/src/mongo/db/query/stage_types.h index ecbacb9068a..cb1ffb391d1 100644 --- a/src/mongo/db/query/stage_types.h +++ b/src/mongo/db/query/stage_types.h @@ -38,6 +38,11 @@ namespace mongo { STAGE_AND_SORTED, STAGE_COLLSCAN, + // If we're running a .count(), the query is fully covered by one ixscan, and the ixscan is + // from one key to another, we can just skip through the keys without bothering to examine + // them. + STAGE_COUNT, + // If we're running a distinct, we only care about one value for each key. The distinct // stage is an ixscan with some key-skipping behvaior that only distinct uses. STAGE_DISTINCT, |