summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHari Khalsa <hkhalsa@10gen.com>2014-03-20 14:57:34 -0400
committerHari Khalsa <hkhalsa@10gen.com>2014-03-21 12:57:54 -0400
commit0966b812bd884607768a521c46fe9e88c00b89fb (patch)
treed11cd24a2a737eca9c5f5884037dd5eff8deedde
parent52a9e923df6a67a5849e77c460079c0c57bfdfd0 (diff)
downloadmongo-0966b812bd884607768a521c46fe9e88c00b89fb.tar.gz
SERVER-13292 fast path simple/fully covered projections
-rw-r--r--src/mongo/db/exec/projection.cpp137
-rw-r--r--src/mongo/db/exec/projection.h56
-rw-r--r--src/mongo/db/exec/projection_exec.cpp36
-rw-r--r--src/mongo/db/query/parsed_projection.cpp13
-rw-r--r--src/mongo/db/query/parsed_projection.h6
-rw-r--r--src/mongo/db/query/planner_analysis.cpp49
-rw-r--r--src/mongo/db/query/query_solution.cpp11
-rw-r--r--src/mongo/db/query/query_solution.h31
-rw-r--r--src/mongo/db/query/stage_builder.cpp20
9 files changed, 321 insertions, 38 deletions
diff --git a/src/mongo/db/exec/projection.cpp b/src/mongo/db/exec/projection.cpp
index 1a094011193..06acc8b789a 100644
--- a/src/mongo/db/exec/projection.cpp
+++ b/src/mongo/db/exec/projection.cpp
@@ -37,13 +37,137 @@
namespace mongo {
- ProjectionStage::ProjectionStage(BSONObj projObj,
- const MatchExpression* fullExpression,
+ static const char* kIdField = "_id";
+
+ ProjectionStage::ProjectionStage(const ProjectionStageParams& params,
WorkingSet* ws,
PlanStage* child)
- : _exec(new ProjectionExec(projObj, fullExpression)),
- _ws(ws),
- _child(child) { }
+ : _ws(ws),
+ _child(child),
+ _projImpl(params.projImpl) {
+
+ if (ProjectionStageParams::NO_FAST_PATH == _projImpl) {
+ _exec.reset(new ProjectionExec(params.projObj, params.fullExpression));
+ }
+ else {
+ // We shouldn't need the full expression if we're fast-pathing.
+ invariant(NULL == params.fullExpression);
+
+ _projObj = params.projObj;
+
+ // Sanity-check the input.
+ invariant(_projObj.isOwned());
+ invariant(!_projObj.isEmpty());
+
+ // The _id is included by default.
+ bool includeId = true;
+
+ // Figure out what fields are in the projection. TODO: we can get this from the
+ // ParsedProjection...modify that to have this type instead of a vector.
+ BSONObjIterator projObjIt(_projObj);
+ while (projObjIt.more()) {
+ BSONElement elt = projObjIt.next();
+ // Must deal with the _id case separately as there is an implicit _id: 1 in the
+ // projection.
+ if (mongoutils::str::equals(elt.fieldName(), kIdField)
+ && !elt.trueValue()) {
+ includeId = false;
+ continue;
+ }
+ _includedFields.insert(elt.fieldNameStringData());
+ }
+
+ if (includeId) {
+ _includedFields.insert(kIdField);
+ }
+
+ // If we're pulling data out of one index we can pre-compute the indices of the fields
+ // in the key that we pull data from and avoid looking up the field name each time.
+ if (ProjectionStageParams::COVERED_ONE_INDEX == params.projImpl) {
+ // Sanity-check.
+ _coveredKeyObj = params.coveredKeyObj;
+ invariant(_coveredKeyObj.isOwned());
+
+ BSONObjIterator kpIt(_coveredKeyObj);
+ while (kpIt.more()) {
+ BSONElement elt = kpIt.next();
+ unordered_set<StringData, StringData::Hasher>::iterator fieldIt;
+ fieldIt = _includedFields.find(elt.fieldNameStringData());
+
+ if (_includedFields.end() == fieldIt) {
+ // Push an unused value on the back to keep _includeKey and _keyFieldNames
+ // in sync.
+ _keyFieldNames.push_back(StringData());
+ _includeKey.push_back(false);
+ }
+ else {
+ // If we are including this key field store its field name.
+ _keyFieldNames.push_back(*fieldIt);
+ _includeKey.push_back(true);
+ }
+ }
+ }
+ else {
+ invariant(ProjectionStageParams::SIMPLE_DOC == params.projImpl);
+ }
+ }
+ }
+
+ Status ProjectionStage::transform(WorkingSetMember* member) {
+ // The default no-fast-path case.
+ if (ProjectionStageParams::NO_FAST_PATH == _projImpl) {
+ return _exec->transform(member);
+ }
+
+ BSONObjBuilder bob;
+
+ // Note that even if our fast path analysis is bug-free something that is
+ // covered might be invalidated and just be an obj. In this case we just go
+ // through the SIMPLE_DOC path which is still correct if the covered data
+ // is not available.
+ //
+ // SIMPLE_DOC implies that we expect an object so it's kind of redundant.
+ if ((ProjectionStageParams::SIMPLE_DOC == _projImpl) || member->hasObj()) {
+ // If we got here because of SIMPLE_DOC the planner shouldn't have messed up.
+ invariant(member->hasObj());
+
+ // Look at every field in the source document and see if we're including it.
+ BSONObjIterator inputIt(member->obj);
+ while (inputIt.more()) {
+ BSONElement elt = inputIt.next();
+ unordered_set<StringData, StringData::Hasher>::iterator fieldIt;
+ fieldIt = _includedFields.find(elt.fieldNameStringData());
+ if (_includedFields.end() != fieldIt) {
+ // If so, add it to the builder.
+ bob.append(elt);
+ }
+ }
+ }
+ else {
+ invariant(ProjectionStageParams::COVERED_ONE_INDEX == _projImpl);
+ // We're pulling data out of the key.
+ invariant(1 == member->keyData.size());
+ size_t keyIndex = 0;
+
+ // Look at every key element...
+ BSONObjIterator keyIterator(member->keyData[0].keyData);
+ while (keyIterator.more()) {
+ BSONElement elt = keyIterator.next();
+ // If we're supposed to include it...
+ if (_includeKey[keyIndex]) {
+ // Do so.
+ bob.appendAs(elt, _keyFieldNames[keyIndex]);
+ }
+ ++keyIndex;
+ }
+ }
+
+ member->state = WorkingSetMember::OWNED_OBJ;
+ member->keyData.clear();
+ member->loc = DiskLoc();
+ member->obj = bob.obj();
+ return Status::OK();
+ }
ProjectionStage::~ProjectionStage() { }
@@ -59,7 +183,8 @@ namespace mongo {
// tailable cursor and isEOF() would be true even if it had more data...
if (PlanStage::ADVANCED == status) {
WorkingSetMember* member = _ws->get(id);
- Status projStatus = _exec->transform(member);
+ // Punt to our specific projection impl.
+ Status projStatus = transform(member);
if (!projStatus.isOK()) {
warning() << "Couldn't execute projection, status = "
<< projStatus.toString() << endl;
diff --git a/src/mongo/db/exec/projection.h b/src/mongo/db/exec/projection.h
index c558b8e434a..14420fccca0 100644
--- a/src/mongo/db/exec/projection.h
+++ b/src/mongo/db/exec/projection.h
@@ -36,13 +36,41 @@
namespace mongo {
+ struct ProjectionStageParams {
+ enum ProjectionImplementation {
+ // The default case. Will handle every projection.
+ NO_FAST_PATH,
+
+ // The projection is simple inclusion and is totally covered by one index.
+ COVERED_ONE_INDEX,
+
+ // The projection is simple inclusion and we expect an object.
+ SIMPLE_DOC
+ };
+
+ ProjectionStageParams() : projImpl(NO_FAST_PATH), fullExpression(NULL) { }
+
+ ProjectionImplementation projImpl;
+
+ // The projection object. We lack a ProjectionExpression or similar so we use a BSONObj.
+ BSONObj projObj;
+
+ // If we have a positional or elemMatch projection we need a MatchExpression to pull out the
+ // right data.
+ // Not owned here, we do not take ownership.
+ const MatchExpression* fullExpression;
+
+ // If (COVERED_ONE_INDEX == projObj) this is the key pattern we're extracting covered data
+ // from. Otherwise, this field is ignored.
+ BSONObj coveredKeyObj;
+ };
+
/**
* This stage computes a projection.
*/
class ProjectionStage : public PlanStage {
public:
- ProjectionStage(BSONObj projObj,
- const MatchExpression* fullExpression,
+ ProjectionStage(const ProjectionStageParams& params,
WorkingSet* ws,
PlanStage* child);
@@ -58,6 +86,8 @@ namespace mongo {
PlanStageStats* getStats();
private:
+ Status transform(WorkingSetMember* member);
+
scoped_ptr<ProjectionExec> _exec;
// _ws is not owned by us.
@@ -66,6 +96,28 @@ namespace mongo {
// Stats
CommonStats _commonStats;
+
+ // Fast paths:
+ ProjectionStageParams::ProjectionImplementation _projImpl;
+
+ // Used by all projection implementations.
+ BSONObj _projObj;
+
+ // Data used for both SIMPLE_DOC and COVERED_ONE_INDEX paths.
+ // Has the field names present in the simple projection.
+ unordered_set<StringData, StringData::Hasher> _includedFields;
+
+ //
+ // Used for the COVERED_ONE_INDEX path.
+ //
+ BSONObj _coveredKeyObj;
+
+ // Field names can be empty in 2.4 and before so we can't use them as a sentinel value.
+ // If the i-th entry is true we include the i-th field in the key.
+ vector<bool> _includeKey;
+
+ // If the i-th entry of _includeKey is true this is the field name for the i-th key field.
+ vector<StringData> _keyFieldNames;
};
} // namespace mongo
diff --git a/src/mongo/db/exec/projection_exec.cpp b/src/mongo/db/exec/projection_exec.cpp
index 15a224cbe72..afa60132631 100644
--- a/src/mongo/db/exec/projection_exec.cpp
+++ b/src/mongo/db/exec/projection_exec.cpp
@@ -249,7 +249,23 @@ namespace mongo {
}
BSONObjBuilder bob;
- if (!requiresDocument()) {
+ if (member->hasObj()) {
+ MatchDetails matchDetails;
+
+ // If it's a positional projection we need a MatchDetails.
+ if (transformRequiresDetails()) {
+ matchDetails.requestElemMatchKey();
+ verify(NULL != _queryExpression);
+ verify(_queryExpression->matchesBSON(member->obj, &matchDetails));
+ }
+
+ Status projStatus = transform(member->obj, &bob, &matchDetails);
+ if (!projStatus.isOK()) {
+ return projStatus;
+ }
+ }
+ else {
+ verify(!requiresDocument());
// Go field by field.
if (_includeID) {
BSONElement elt;
@@ -273,24 +289,6 @@ namespace mongo {
}
}
}
- else {
- // Planner should have done this.
- verify(member->hasObj());
-
- MatchDetails matchDetails;
-
- // If it's a positional projection we need a MatchDetails.
- if (transformRequiresDetails()) {
- matchDetails.requestElemMatchKey();
- verify(NULL != _queryExpression);
- verify(_queryExpression->matchesBSON(member->obj, &matchDetails));
- }
-
- Status projStatus = transform(member->obj, &bob, &matchDetails);
- if (!projStatus.isOK()) {
- return projStatus;
- }
- }
for (MetaMap::const_iterator it = _meta.begin(); it != _meta.end(); ++it) {
if (META_GEONEAR_DIST == it->second) {
diff --git a/src/mongo/db/query/parsed_projection.cpp b/src/mongo/db/query/parsed_projection.cpp
index 06a8006cef8..a30623e36e6 100644
--- a/src/mongo/db/query/parsed_projection.cpp
+++ b/src/mongo/db/query/parsed_projection.cpp
@@ -237,13 +237,7 @@ namespace mongo {
// Save the raw spec. It should be owned by the LiteParsedQuery.
verify(spec.isOwned());
pp->_source = spec;
-
- // returnKey clobbers everything.
- if (hasIndexKeyProjection) {
- pp->_requiresDocument = false;
- *out = pp.release();
- return Status::OK();
- }
+ pp->_returnKey = hasIndexKeyProjection;
// Dotted fields aren't covered, non-simple require match details, and as for include, "if
// we default to including then we can't use an index because we don't know what we're
@@ -276,6 +270,11 @@ namespace mongo {
}
}
+ // returnKey clobbers everything.
+ if (hasIndexKeyProjection) {
+ pp->_requiresDocument = false;
+ }
+
*out = pp.release();
return Status::OK();
}
diff --git a/src/mongo/db/query/parsed_projection.h b/src/mongo/db/query/parsed_projection.h
index 19f48a7819b..1d94057dff6 100644
--- a/src/mongo/db/query/parsed_projection.h
+++ b/src/mongo/db/query/parsed_projection.h
@@ -83,6 +83,10 @@ namespace mongo {
return _wantGeoNearPoint;
}
+ bool wantIndexKey() const {
+ return _returnKey;
+ }
+
private:
/**
* Must go through ::make
@@ -116,6 +120,8 @@ namespace mongo {
bool _wantGeoNearDistance;
bool _wantGeoNearPoint;
+
+ bool _returnKey;
};
} // namespace mongo
diff --git a/src/mongo/db/query/planner_analysis.cpp b/src/mongo/db/query/planner_analysis.cpp
index 5eebbb2c8a3..b9ee0f44613 100644
--- a/src/mongo/db/query/planner_analysis.cpp
+++ b/src/mongo/db/query/planner_analysis.cpp
@@ -542,6 +542,10 @@ namespace mongo {
if (NULL != query.getProj()) {
QLOG() << "PROJECTION: fetched status: " << solnRoot->fetched() << endl;
QLOG() << "PROJECTION: Current plan is:\n" << solnRoot->toString() << endl;
+
+ ProjectionNode::ProjectionType projType = ProjectionNode::DEFAULT;
+ BSONObj coveredKeyObj;
+
if (query.getProj()->requiresDocument()) {
QLOG() << "PROJECTION: claims to require doc adding fetch.\n";
// If the projection requires the entire document, somebody must fetch.
@@ -551,7 +555,10 @@ namespace mongo {
solnRoot = fetch;
}
}
- else {
+ else if (!query.getProj()->wantIndexKey()) {
+ // The only way we're here is if it's a simple projection. That is, we can pick out
+ // the fields we want to include and they're not dotted. So we want to execute the
+ // projection in the fast-path simple fashion. Just don't know which fast path yet.
QLOG() << "PROJECTION: requires fields\n";
const vector<string>& fields = query.getProj()->getRequiredFields();
bool covered = true;
@@ -563,13 +570,51 @@ namespace mongo {
break;
}
}
+
QLOG() << "PROJECTION: is covered?: = " << covered << endl;
+
// If any field is missing from the list of fields the projection wants,
// a fetch is required.
if (!covered) {
FetchNode* fetch = new FetchNode();
fetch->children.push_back(solnRoot);
solnRoot = fetch;
+
+ // It's simple but we'll have the full document and we should just iterate
+ // over that.
+ projType = ProjectionNode::SIMPLE_DOC;
+ QLOG() << "PROJECTION: not covered, fetching.";
+ }
+ else {
+ if (solnRoot->fetched()) {
+ // Fetched implies hasObj() so let's run with that.
+ projType = ProjectionNode::SIMPLE_DOC;
+ QLOG() << "PROJECTION: covered via FETCH, using SIMPLE_DOC fast path";
+ }
+ else {
+ // If we're here we're not fetched so we're covered. Let's see if we can
+ // get out of using the default projType. If there's only one leaf
+ // underneath and it's giving us index data we can use the faster covered
+ // impl.
+ vector<QuerySolutionNode*> leafNodes;
+ getLeafNodes(solnRoot, &leafNodes);
+
+ if (1 == leafNodes.size()) {
+ // Both the IXSCAN and DISTINCT stages provide covered key data.
+ if (STAGE_IXSCAN == leafNodes[0]->getType()) {
+ projType = ProjectionNode::COVERED_ONE_INDEX;
+ IndexScanNode* ixn = static_cast<IndexScanNode*>(leafNodes[0]);
+ coveredKeyObj = ixn->indexKeyPattern;
+ QLOG() << "PROJECTION: covered via IXSCAN, using COVERED fast path";
+ }
+ else if (STAGE_DISTINCT == leafNodes[0]->getType()) {
+ projType = ProjectionNode::COVERED_ONE_INDEX;
+ DistinctNode* dn = static_cast<DistinctNode*>(leafNodes[0]);
+ coveredKeyObj = dn->indexKeyPattern;
+ QLOG() << "PROJECTION: covered via DISTINCT, using COVERED fast path";
+ }
+ }
+ }
}
}
@@ -578,6 +623,8 @@ namespace mongo {
projNode->children.push_back(solnRoot);
projNode->fullExpression = query.root();
projNode->projection = query.getParsed().getProj();
+ projNode->projType = projType;
+ projNode->coveredKeyObj = coveredKeyObj;
solnRoot = projNode;
}
else {
diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp
index 36c416ca5aa..5df771b491d 100644
--- a/src/mongo/db/query/query_solution.cpp
+++ b/src/mongo/db/query/query_solution.cpp
@@ -561,6 +561,17 @@ namespace mongo {
*ss << "PROJ\n";
addIndent(ss, indent + 1);
*ss << "proj = " << projection.toString() << '\n';
+ addIndent(ss, indent + 1);
+ if (DEFAULT == projType) {
+ *ss << "type = DEFAULT\n";
+ }
+ else if (COVERED_ONE_INDEX == projType) {
+ *ss << "type = COVERED_ONE_INDEX\n";
+ }
+ else {
+ invariant(SIMPLE_DOC == projType);
+ *ss << "type = SIMPLE_DOC\n";
+ }
addCommon(ss, indent);
addIndent(ss, indent + 1);
*ss << "Child:" << '\n';
diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h
index 70efebcc4c5..b5ca922eb1a 100644
--- a/src/mongo/db/query/query_solution.h
+++ b/src/mongo/db/query/query_solution.h
@@ -419,7 +419,26 @@ namespace mongo {
};
struct ProjectionNode : public QuerySolutionNode {
- ProjectionNode() { }
+ /**
+ * We have a few implementations of the projection functionality. The most general
+ * implementation 'DEFAULT' is much slower than the fast-path implementations
+ * below. We only really have all the information available to choose a projection
+ * implementation at planning time.
+ */
+ enum ProjectionType {
+ // This is the most general implementation of the projection functionality. It handles
+ // every case.
+ DEFAULT,
+
+ // This is a fast-path for when the projection is fully covered by one index.
+ COVERED_ONE_INDEX,
+
+ // This is a fast-path for when the projection only has inclusions on non-dotted fields.
+ SIMPLE_DOC,
+ };
+
+ ProjectionNode() : fullExpression(NULL), projType(DEFAULT) { }
+
virtual ~ProjectionNode() { }
virtual StageType getType() const { return STAGE_PROJECTION; }
@@ -466,6 +485,14 @@ namespace mongo {
// Given that we don't yet have a MatchExpression analogue for the expression language, we
// use a BSONObj.
BSONObj projection;
+
+ // What implementation of the projection algorithm should we use?
+ ProjectionType projType;
+
+ // Only meaningful if projType == COVERED_ONE_INDEX. This is the key pattern of the index
+ // supplying our covered data. We can pre-compute which fields to include and cache that
+ // data for later if we know we only have one index.
+ BSONObj coveredKeyObj;
};
struct SortNode : public QuerySolutionNode {
@@ -676,7 +703,7 @@ namespace mongo {
// This stage is created "on top" of normal planning and as such the properties
// below don't really matter.
- bool fetched() const { return true; }
+ bool fetched() const { return false; }
bool hasField(const string& field) const { return !indexKeyPattern[field].eoo(); }
bool sortedByDiskLoc() const { return false; }
const BSONObjSet& getSort() const { return sorts; }
diff --git a/src/mongo/db/query/stage_builder.cpp b/src/mongo/db/query/stage_builder.cpp
index d7f7099d508..697dfbbc227 100644
--- a/src/mongo/db/query/stage_builder.cpp
+++ b/src/mongo/db/query/stage_builder.cpp
@@ -109,7 +109,25 @@ namespace mongo {
const ProjectionNode* pn = static_cast<const ProjectionNode*>(root);
PlanStage* childStage = buildStages(qsol, pn->children[0], ws);
if (NULL == childStage) { return NULL; }
- return new ProjectionStage(pn->projection, pn->fullExpression, ws, childStage);
+ ProjectionStageParams params;
+ params.projObj = pn->projection;
+
+ // Stuff the right data into the params depending on what proj impl we use.
+ if (ProjectionNode::DEFAULT == pn->projType) {
+ params.fullExpression = pn->fullExpression;
+ params.projImpl = ProjectionStageParams::NO_FAST_PATH;
+ }
+ else if (ProjectionNode::COVERED_ONE_INDEX == pn->projType) {
+ params.projImpl = ProjectionStageParams::COVERED_ONE_INDEX;
+ params.coveredKeyObj = pn->coveredKeyObj;
+ invariant(!pn->coveredKeyObj.isEmpty());
+ }
+ else {
+ invariant(ProjectionNode::SIMPLE_DOC == pn->projType);
+ params.projImpl = ProjectionStageParams::SIMPLE_DOC;
+ }
+
+ return new ProjectionStage(params, ws, childStage);
}
else if (STAGE_LIMIT == root->getType()) {
const LimitNode* ln = static_cast<const LimitNode*>(root);