summaryrefslogtreecommitdiff
path: root/src/mongo/db/query
diff options
context:
space:
mode:
authorHari Khalsa <hkhalsa@10gen.com>2014-01-27 18:01:26 -0500
committerHari Khalsa <hkhalsa@10gen.com>2014-01-29 13:18:47 -0500
commit3b3c25e571852893373ae2e2b361397b260687c9 (patch)
treee2350ef7605c1e5a6d33649d095d19a624803ea9 /src/mongo/db/query
parent2bee3f018b8d4351f8261c405adcdff44c7f9a70 (diff)
downloadmongo-3b3c25e571852893373ae2e2b361397b260687c9.tar.gz
SERVER-12460 faster count for simple queries
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r--src/mongo/db/query/get_runner.cpp277
-rw-r--r--src/mongo/db/query/get_runner.h22
-rw-r--r--src/mongo/db/query/interval.cpp5
-rw-r--r--src/mongo/db/query/interval.h3
-rw-r--r--src/mongo/db/query/plan_executor.cpp7
-rw-r--r--src/mongo/db/query/query_planner_params.h4
-rw-r--r--src/mongo/db/query/query_solution.cpp15
-rw-r--r--src/mongo/db/query/query_solution.h26
-rw-r--r--src/mongo/db/query/stage_builder.cpp27
-rw-r--r--src/mongo/db/query/stage_types.h5
10 files changed, 375 insertions, 16 deletions
diff --git a/src/mongo/db/query/get_runner.cpp b/src/mongo/db/query/get_runner.cpp
index f230f4c95dc..36d5447b9c1 100644
--- a/src/mongo/db/query/get_runner.cpp
+++ b/src/mongo/db/query/get_runner.cpp
@@ -126,12 +126,19 @@ namespace mongo {
return Status::OK();
}
+ namespace {
+ // The body is below in the "count hack" section but getRunner calls it.
+ bool turnIxscanIntoCount(QuerySolution* soln);
+ } // namespace
+
/**
* For a given query, get a runner. The runner could be a SingleSolutionRunner, a
* CachedQueryRunner, or a MultiPlanRunner, depending on the cache/query solver/etc.
*/
- Status getRunner(Collection* collection, CanonicalQuery* rawCanonicalQuery,
- Runner** out, size_t plannerOptions) {
+ Status getRunner(Collection* collection,
+ CanonicalQuery* rawCanonicalQuery,
+ Runner** out,
+ size_t plannerOptions) {
verify(rawCanonicalQuery);
auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery);
@@ -239,7 +246,22 @@ namespace mongo {
QuerySolution *qs, *backupQs;
Status status = QueryPlanner::planFromCache(*canonicalQuery, plannerParams, *cs,
&qs, &backupQs);
+
if (status.isOK()) {
+ if (plannerParams.options & QueryPlannerParams::PRIVATE_IS_COUNT) {
+ if (turnIxscanIntoCount(qs)) {
+ WorkingSet* ws;
+ PlanStage* root;
+ verify(StageBuilder::build(*qs, &root, &ws));
+ *out = new SingleSolutionRunner(collection,
+ canonicalQuery.release(), qs, root, ws);
+ if (NULL != backupQs) {
+ delete backupQs;
+ }
+ return Status::OK();
+ }
+ }
+
WorkingSet* ws;
PlanStage* root;
verify(StageBuilder::build(*qs, &root, &ws));
@@ -270,13 +292,8 @@ namespace mongo {
" planner returned error: " + status.reason());
}
- /*
- for (size_t i = 0; i < solutions.size(); ++i) {
- QLOG() << "solution " << i << " is " << solutions[i]->toString() << endl;
- }
- */
-
- // We cannot figure out how to answer the query. Should this ever happen?
+ // We cannot figure out how to answer the query. Perhaps it requires an index
+ // we do not have?
if (0 == solutions.size()) {
return Status(ErrorCodes::BadValue,
str::stream()
@@ -285,6 +302,31 @@ namespace mongo {
<< " No query solutions");
}
+ // See if one of our solutions is a fast count hack in disguise.
+ if (plannerParams.options & QueryPlannerParams::PRIVATE_IS_COUNT) {
+ for (size_t i = 0; i < solutions.size(); ++i) {
+ if (turnIxscanIntoCount(solutions[i])) {
+ // Great, we can use solutions[i]. Clean up the other QuerySolution(s).
+ for (size_t j = 0; j < solutions.size(); ++j) {
+ if (j != i) {
+ delete solutions[j];
+ }
+ }
+
+ // We're not going to cache anything that's fast count.
+ WorkingSet* ws;
+ PlanStage* root;
+ verify(StageBuilder::build(*solutions[i], &root, &ws));
+ *out = new SingleSolutionRunner(collection,
+ canonicalQuery.release(),
+ solutions[i],
+ root,
+ ws);
+ return Status::OK();
+ }
+ }
+ }
+
if (1 == solutions.size()) {
// Only one possible plan. Run it. Build the stages from the solution.
WorkingSet* ws;
@@ -315,6 +357,219 @@ namespace mongo {
}
//
+ // Count hack
+ //
+
+ namespace {
+
+ /**
+ * Returns 'true' if the bounds 'bounds' can be represented as an interval between two the two
+ * values 'startKey' and 'endKey'. Inclusivity of each bound is set through the relevant
+ * fooKeyInclusive parameter.
+ *
+ * Returns 'false' otherwise.
+ *
+ * XXX: unit test this.
+ */
+ bool isSingleInterval(const IndexBounds& bounds,
+ BSONObj* startKey,
+ bool* startKeyInclusive,
+ BSONObj* endKey,
+ bool* endKeyInclusive) {
+ // We build our start/end keys as we go.
+ BSONObjBuilder startBob;
+ BSONObjBuilder endBob;
+
+ // The start and end keys are inclusive unless we have a non-point interval, in which case
+ // we take the inclusivity from there.
+ *startKeyInclusive = true;
+ *endKeyInclusive = true;
+
+ size_t fieldNo = 0;
+
+ // First, we skip over point intervals.
+ for (; fieldNo < bounds.fields.size(); ++fieldNo) {
+ const OrderedIntervalList& oil = bounds.fields[fieldNo];
+ // A point interval requires just one interval...
+ if (1 != oil.intervals.size()) {
+ break;
+ }
+ if (!oil.intervals[0].isPoint()) {
+ break;
+ }
+ // Since it's a point, start == end.
+ startBob.append(oil.intervals[0].start);
+ endBob.append(oil.intervals[0].end);
+ }
+
+ if (fieldNo >= bounds.fields.size()) {
+ // All our intervals are points. We count for all values of one field.
+ *startKey = startBob.obj();
+ *endKey = endBob.obj();
+ return true;
+ }
+
+ // After point intervals we can have exactly one non-point interval.
+ const OrderedIntervalList& nonPoint = bounds.fields[fieldNo];
+ if (1 != nonPoint.intervals.size()) {
+ return false;
+ }
+
+ // Add the non-point interval to our builder and set the inclusivity from it.
+ startBob.append(nonPoint.intervals[0].start);
+ *startKeyInclusive = nonPoint.intervals[0].startInclusive;
+ endBob.append(nonPoint.intervals[0].end);
+ *endKeyInclusive = nonPoint.intervals[0].endInclusive;
+
+ ++fieldNo;
+
+ // Get some "all values" intervals for comparison's sake.
+ // TODO: make static?
+ Interval minMax = IndexBoundsBuilder::allValues();
+ Interval maxMin = minMax;
+ maxMin.reverse();
+
+ // And after the non-point interval we can have any number of "all values" intervals.
+ for (; fieldNo < bounds.fields.size(); ++fieldNo) {
+ const OrderedIntervalList& oil = bounds.fields[fieldNo];
+ // "All Values" is just one point.
+ if (1 != oil.intervals.size()) {
+ break;
+ }
+
+ // Must be min->max or max->min.
+ if (oil.intervals[0].equals(minMax)) {
+ // As an example for the logic below, consider the index {a:1, b:1} and a count for
+ // {a: {$gt: 2}}. Our start key isn't inclusive (as it's $gt: 2) and looks like
+ // {"":2} so far. If we move to the key greater than {"":2, "": MaxKey} we will get
+ // the first value of 'a' that is greater than 2.
+ if (!*startKeyInclusive) {
+ startBob.appendMaxKey("");
+ }
+ else {
+ // In this case, consider the index {a:1, b:1} and a count for {a:{$gte: 2}}.
+ // We want to look at all values where a is 2, so our start key is {"":2,
+ // "":MinKey}.
+ startBob.appendMinKey("");
+ }
+
+ // Same deal as above. Consider the index {a:1, b:1} and a count for {a: {$lt: 2}}.
+ // Our end key isn't inclusive as ($lt: 2) and looks like {"":2} so far. We can't
+ // look at any values where a is 2 so we have to stop at {"":2, "": MinKey} as
+ // that's the smallest key where a is still 2.
+ if (!*endKeyInclusive) {
+ endBob.appendMinKey("");
+ }
+ else {
+ endBob.appendMaxKey("");
+ }
+ }
+ else if (oil.intervals[0].equals(maxMin)) {
+ // The reasoning here is the same as above but with the directions reversed.
+ if (!*startKeyInclusive) {
+ startBob.appendMinKey("");
+ }
+ else {
+ startBob.appendMaxKey("");
+ }
+ if (!*endKeyInclusive) {
+ endBob.appendMaxKey("");
+ }
+ else {
+ endBob.appendMinKey("");
+ }
+ }
+ else {
+ // No dice.
+ break;
+ }
+ }
+
+ if (fieldNo >= bounds.fields.size()) {
+ *startKey = startBob.obj();
+ *endKey = endBob.obj();
+ return true;
+ }
+ else {
+ return false;
+ }
+ }
+
+ /**
+ * Returns 'true' if the provided solution 'soln' can be rewritten to use
+ * a fast counting stage. Mutates the tree in 'soln->root'.
+ *
+ * Otherwise, returns 'false'.
+ */
+ bool turnIxscanIntoCount(QuerySolution* soln) {
+ QuerySolutionNode* root = soln->root.get();
+
+ // Root should be a fetch w/o any filters.
+ if (STAGE_FETCH != root->getType()) {
+ return false;
+ }
+
+ if (NULL != root->filter.get()) {
+ return false;
+ }
+
+ // Child should be an ixscan.
+ if (STAGE_IXSCAN != root->children[0]->getType()) {
+ return false;
+ }
+
+ IndexScanNode* isn = static_cast<IndexScanNode*>(root->children[0]);
+
+ // No filters allowed and side-stepping isSimpleRange for now. TODO: do we ever see
+ // isSimpleRange here? because we could well use it. I just don't think we ever do see it.
+ if (NULL != isn->filter.get() || isn->bounds.isSimpleRange) {
+ return false;
+ }
+
+ // Make sure the bounds are OK.
+ BSONObj startKey;
+ bool startKeyInclusive;
+ BSONObj endKey;
+ bool endKeyInclusive;
+
+ if (!isSingleInterval(isn->bounds, &startKey, &startKeyInclusive, &endKey, &endKeyInclusive)) {
+ return false;
+ }
+
+ // Make the count node that we replace the fetch + ixscan with.
+ CountNode* cn = new CountNode();
+ cn->indexKeyPattern = isn->indexKeyPattern;
+ cn->startKey = startKey;
+ cn->startKeyInclusive = startKeyInclusive;
+ cn->endKey = endKey;
+ cn->endKeyInclusive = endKeyInclusive;
+ // Takes ownership of 'cn' and deletes the old root.
+ soln->root.reset(cn);
+ return true;
+ }
+
+ } // namespace
+
+ Status getRunnerCount(Collection* collection,
+ const BSONObj& query,
+ const BSONObj& hintObj,
+ Runner** out) {
+ verify(collection);
+
+ CanonicalQuery* cq;
+ uassertStatusOK(CanonicalQuery::canonicalize(collection->ns().ns(),
+ query,
+ BSONObj(),
+ BSONObj(),
+ 0,
+ 0,
+ hintObj,
+ &cq));
+
+ return getRunner(collection, cq, out, QueryPlannerParams::PRIVATE_IS_COUNT);
+ }
+
+ //
// Distinct hack
//
@@ -378,10 +633,6 @@ namespace mongo {
const BSONObj& query,
const string& field,
Runner** out) {
-
- Database* db = cc().database();
- verify(db);
-
// This should'a been checked by the distinct command.
verify(collection);
diff --git a/src/mongo/db/query/get_runner.h b/src/mongo/db/query/get_runner.h
index 5ac91282c77..c5011d7639c 100644
--- a/src/mongo/db/query/get_runner.h
+++ b/src/mongo/db/query/get_runner.h
@@ -53,7 +53,9 @@ namespace mongo {
* If the query cannot be executed, returns a Status indicating why. Deletes
* rawCanonicalQuery.
*/
- Status getRunner(CanonicalQuery* rawCanonicalQuery, Runner** out, size_t plannerOptions = 0);
+ Status getRunner(CanonicalQuery* rawCanonicalQuery,
+ Runner** out,
+ size_t plannerOptions = 0);
/**
* Get a runner for a query. Takes ownership of rawCanonicalQuery.
@@ -62,8 +64,10 @@ namespace mongo {
* work to obtain the Collection has already been done by the caller. The 'collection'
* argument may be NULL.
*/
- Status getRunner(Collection* collection, CanonicalQuery* rawCanonicalQuery,
- Runner** out, size_t plannerOptions = 0);
+ Status getRunner(Collection* collection,
+ CanonicalQuery* rawCanonicalQuery,
+ Runner** out,
+ size_t plannerOptions = 0);
/**
* Gets a runner for a query described as an unparsed BSON object over the named and optionally
@@ -79,6 +83,7 @@ namespace mongo {
Status getRunner(Collection* collection, const std::string& ns, const BSONObj& unparsedQuery,
Runner** outRunner, CanonicalQuery** outCanonicalQuery,
size_t plannerOptions = 0);
+
/*
* Get a runner for a query executing as part of a distinct command.
*
@@ -90,6 +95,17 @@ namespace mongo {
const BSONObj& query,
const std::string& field,
Runner** out);
+ /*
+ * Get a runner for a query executing as part of a count command.
+ *
+ * Count doesn't care about actually examining its results; it just wants to walk through them.
+ * As such, with certain covered queries, we can skip the overhead of fetching etc. when
+ * executing a count.
+ */
+ Status getRunnerCount(Collection* collection,
+ const BSONObj& query,
+ const BSONObj& hintObj,
+ Runner** out);
/**
* RAII approach to ensuring that runners are deregistered in newRunQuery.
diff --git a/src/mongo/db/query/interval.cpp b/src/mongo/db/query/interval.cpp
index 9e1c7972273..a587d64b312 100644
--- a/src/mongo/db/query/interval.cpp
+++ b/src/mongo/db/query/interval.cpp
@@ -134,6 +134,11 @@ namespace mongo {
return _intervalData.nFields() == 0;
}
+ // XXX: move the stuff in the anonymous namespace into 'this' and clean this file up in general.
+ bool Interval::equals(const Interval& other) const {
+ return exact(*this, other);
+ }
+
// TODO: shortcut number of comparisons
Interval::IntervalComparison Interval::compare(const Interval& other) const {
//
diff --git a/src/mongo/db/query/interval.h b/src/mongo/db/query/interval.h
index e74cabcee35..1b8e381bc17 100644
--- a/src/mongo/db/query/interval.h
+++ b/src/mongo/db/query/interval.h
@@ -98,6 +98,9 @@ namespace mongo {
return (!startInclusive || !endInclusive) && 0 == start.woCompare(end, false);
}
+ /** Returns true if 'this' is the same interval as 'other' */
+ bool equals(const Interval& other) const;
+
/**
* Swap start and end points of interval.
*/
diff --git a/src/mongo/db/query/plan_executor.cpp b/src/mongo/db/query/plan_executor.cpp
index c3c5a4fdd42..6bb69de0bfc 100644
--- a/src/mongo/db/query/plan_executor.cpp
+++ b/src/mongo/db/query/plan_executor.cpp
@@ -89,6 +89,13 @@ namespace mongo {
PlanStage::StageState code = _root->work(&id);
if (PlanStage::ADVANCED == code) {
+ // Fast count.
+ if (WorkingSet::INVALID_ID == id) {
+ invariant(NULL == objOut);
+ invariant(NULL == dlOut);
+ return Runner::RUNNER_ADVANCED;
+ }
+
WorkingSetMember* member = _workingSet->get(id);
bool hasRequestedData = true;
diff --git a/src/mongo/db/query/query_planner_params.h b/src/mongo/db/query/query_planner_params.h
index 791cd7c42c5..2ed98e552fe 100644
--- a/src/mongo/db/query/query_planner_params.h
+++ b/src/mongo/db/query/query_planner_params.h
@@ -68,6 +68,10 @@ namespace mongo {
// Set this if you want to try to keep documents deleted or mutated during the execution
// of the query in the query results.
KEEP_MUTATIONS = 1 << 5,
+
+ // Nobody should set this above the getRunner interface. Internal flag set as a hint to
+ // the planner that the caller is actually the count command.
+ PRIVATE_IS_COUNT = 1 << 6,
};
// See Options enum above.
diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp
index 71ef22668f4..6866d017e4c 100644
--- a/src/mongo/db/query/query_solution.cpp
+++ b/src/mongo/db/query/query_solution.cpp
@@ -634,4 +634,19 @@ namespace mongo {
*ss << "bounds = " << bounds.toString() << '\n';
}
+ //
+ // CountNode
+ //
+
+ void CountNode::appendToString(mongoutils::str::stream* ss, int indent) const {
+ addIndent(ss, indent);
+ *ss << "COUNT\n";
+ addIndent(ss, indent + 1);
+ *ss << "keyPattern = " << indexKeyPattern << '\n';
+ addIndent(ss, indent + 1);
+ *ss << "startKey = " << startKey << '\n';
+ addIndent(ss, indent + 1);
+ *ss << "endKey = " << endKey << '\n';
+ }
+
} // namespace mongo
diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h
index 6c3964f86b5..ca0aaa24255 100644
--- a/src/mongo/db/query/query_solution.h
+++ b/src/mongo/db/query/query_solution.h
@@ -618,4 +618,30 @@ namespace mongo {
int fieldNo;
};
+ /**
+ * Some count queries reduce to counting how many keys are between two entries in a
+ * Btree.
+ */
+ struct CountNode : public QuerySolutionNode {
+ CountNode() { }
+ virtual ~CountNode() { }
+
+ virtual StageType getType() const { return STAGE_COUNT; }
+ virtual void appendToString(mongoutils::str::stream* ss, int indent) const;
+
+ bool fetched() const { return true; }
+ bool hasField(const string& field) const { return true; }
+ bool sortedByDiskLoc() const { return false; }
+ const BSONObjSet& getSort() const { return sorts; }
+ BSONObjSet sorts;
+
+ BSONObj indexKeyPattern;
+
+ BSONObj startKey;
+ bool startKeyInclusive;
+
+ BSONObj endKey;
+ bool endKeyInclusive;
+ };
+
} // namespace mongo
diff --git a/src/mongo/db/query/stage_builder.cpp b/src/mongo/db/query/stage_builder.cpp
index 9d4988b4783..395eee8525e 100644
--- a/src/mongo/db/query/stage_builder.cpp
+++ b/src/mongo/db/query/stage_builder.cpp
@@ -33,6 +33,7 @@
#include "mongo/db/exec/and_hash.h"
#include "mongo/db/exec/and_sorted.h"
#include "mongo/db/exec/collection_scan.h"
+#include "mongo/db/exec/count.h"
#include "mongo/db/exec/distinct_scan.h"
#include "mongo/db/exec/fetch.h"
#include "mongo/db/exec/index_scan.h"
@@ -279,6 +280,32 @@ namespace mongo {
params.fieldNo = dn->fieldNo;
return new DistinctScan(params, ws);
}
+ else if (STAGE_COUNT == root->getType()) {
+ const CountNode* cn = static_cast<const CountNode*>(root);
+
+ Database* db = cc().database();
+ if (NULL == db) {
+ warning() << "Can't fast-count null ns (db null)" << qsol.ns << endl;
+ return NULL;
+ }
+
+ Collection* collection = db ? db->getCollection(qsol.ns) : NULL;
+ if (NULL == collection) {
+ warning() << "Can't fast-count null ns (coll null)" << qsol.ns << endl;
+ return NULL;
+ }
+
+ CountParams params;
+
+ params.descriptor =
+ collection->getIndexCatalog()->findIndexByKeyPattern(cn->indexKeyPattern);
+ params.startKey = cn->startKey;
+ params.startKeyInclusive = cn->startKeyInclusive;
+ params.endKey = cn->endKey;
+ params.endKeyInclusive = cn->endKeyInclusive;
+
+ return new Count(params, ws);
+ }
else {
mongoutils::str::stream ss;
root->appendToString(&ss, 0);
diff --git a/src/mongo/db/query/stage_types.h b/src/mongo/db/query/stage_types.h
index ecbacb9068a..cb1ffb391d1 100644
--- a/src/mongo/db/query/stage_types.h
+++ b/src/mongo/db/query/stage_types.h
@@ -38,6 +38,11 @@ namespace mongo {
STAGE_AND_SORTED,
STAGE_COLLSCAN,
+ // If we're running a .count(), the query is fully covered by one ixscan, and the ixscan is
+ // from one key to another, we can just skip through the keys without bothering to examine
+ // them.
+ STAGE_COUNT,
+
// If we're running a distinct, we only care about one value for each key. The distinct
// stage is an ixscan with some key-skipping behvaior that only distinct uses.
STAGE_DISTINCT,