diff options
author | Hari Khalsa <hkhalsa@10gen.com> | 2014-03-20 14:57:34 -0400 |
---|---|---|
committer | Hari Khalsa <hkhalsa@10gen.com> | 2014-03-21 12:57:28 -0400 |
commit | a79c8cfc2f3d778c9aa11d782b76adeec4624b37 (patch) | |
tree | 2af73c492bf8eb2147ac9faebc6c674c1088d40a /src/mongo | |
parent | 3792e501d812a51b796cc670aaf3e3b8c2439682 (diff) | |
download | mongo-a79c8cfc2f3d778c9aa11d782b76adeec4624b37.tar.gz |
SERVER-13292 fast path simple/fully covered projections
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/exec/projection.cpp | 137 | ||||
-rw-r--r-- | src/mongo/db/exec/projection.h | 56 | ||||
-rw-r--r-- | src/mongo/db/exec/projection_exec.cpp | 36 | ||||
-rw-r--r-- | src/mongo/db/query/parsed_projection.cpp | 13 | ||||
-rw-r--r-- | src/mongo/db/query/parsed_projection.h | 6 | ||||
-rw-r--r-- | src/mongo/db/query/planner_analysis.cpp | 49 | ||||
-rw-r--r-- | src/mongo/db/query/query_solution.cpp | 11 | ||||
-rw-r--r-- | src/mongo/db/query/query_solution.h | 31 | ||||
-rw-r--r-- | src/mongo/db/query/stage_builder.cpp | 20 |
9 files changed, 321 insertions, 38 deletions
diff --git a/src/mongo/db/exec/projection.cpp b/src/mongo/db/exec/projection.cpp index 1a094011193..06acc8b789a 100644 --- a/src/mongo/db/exec/projection.cpp +++ b/src/mongo/db/exec/projection.cpp @@ -37,13 +37,137 @@ namespace mongo { - ProjectionStage::ProjectionStage(BSONObj projObj, - const MatchExpression* fullExpression, + static const char* kIdField = "_id"; + + ProjectionStage::ProjectionStage(const ProjectionStageParams& params, WorkingSet* ws, PlanStage* child) - : _exec(new ProjectionExec(projObj, fullExpression)), - _ws(ws), - _child(child) { } + : _ws(ws), + _child(child), + _projImpl(params.projImpl) { + + if (ProjectionStageParams::NO_FAST_PATH == _projImpl) { + _exec.reset(new ProjectionExec(params.projObj, params.fullExpression)); + } + else { + // We shouldn't need the full expression if we're fast-pathing. + invariant(NULL == params.fullExpression); + + _projObj = params.projObj; + + // Sanity-check the input. + invariant(_projObj.isOwned()); + invariant(!_projObj.isEmpty()); + + // The _id is included by default. + bool includeId = true; + + // Figure out what fields are in the projection. TODO: we can get this from the + // ParsedProjection...modify that to have this type instead of a vector. + BSONObjIterator projObjIt(_projObj); + while (projObjIt.more()) { + BSONElement elt = projObjIt.next(); + // Must deal with the _id case separately as there is an implicit _id: 1 in the + // projection. + if (mongoutils::str::equals(elt.fieldName(), kIdField) + && !elt.trueValue()) { + includeId = false; + continue; + } + _includedFields.insert(elt.fieldNameStringData()); + } + + if (includeId) { + _includedFields.insert(kIdField); + } + + // If we're pulling data out of one index we can pre-compute the indices of the fields + // in the key that we pull data from and avoid looking up the field name each time. + if (ProjectionStageParams::COVERED_ONE_INDEX == params.projImpl) { + // Sanity-check. + _coveredKeyObj = params.coveredKeyObj; + invariant(_coveredKeyObj.isOwned()); + + BSONObjIterator kpIt(_coveredKeyObj); + while (kpIt.more()) { + BSONElement elt = kpIt.next(); + unordered_set<StringData, StringData::Hasher>::iterator fieldIt; + fieldIt = _includedFields.find(elt.fieldNameStringData()); + + if (_includedFields.end() == fieldIt) { + // Push an unused value on the back to keep _includeKey and _keyFieldNames + // in sync. + _keyFieldNames.push_back(StringData()); + _includeKey.push_back(false); + } + else { + // If we are including this key field store its field name. + _keyFieldNames.push_back(*fieldIt); + _includeKey.push_back(true); + } + } + } + else { + invariant(ProjectionStageParams::SIMPLE_DOC == params.projImpl); + } + } + } + + Status ProjectionStage::transform(WorkingSetMember* member) { + // The default no-fast-path case. + if (ProjectionStageParams::NO_FAST_PATH == _projImpl) { + return _exec->transform(member); + } + + BSONObjBuilder bob; + + // Note that even if our fast path analysis is bug-free something that is + // covered might be invalidated and just be an obj. In this case we just go + // through the SIMPLE_DOC path which is still correct if the covered data + // is not available. + // + // SIMPLE_DOC implies that we expect an object so it's kind of redundant. + if ((ProjectionStageParams::SIMPLE_DOC == _projImpl) || member->hasObj()) { + // If we got here because of SIMPLE_DOC the planner shouldn't have messed up. + invariant(member->hasObj()); + + // Look at every field in the source document and see if we're including it. + BSONObjIterator inputIt(member->obj); + while (inputIt.more()) { + BSONElement elt = inputIt.next(); + unordered_set<StringData, StringData::Hasher>::iterator fieldIt; + fieldIt = _includedFields.find(elt.fieldNameStringData()); + if (_includedFields.end() != fieldIt) { + // If so, add it to the builder. + bob.append(elt); + } + } + } + else { + invariant(ProjectionStageParams::COVERED_ONE_INDEX == _projImpl); + // We're pulling data out of the key. + invariant(1 == member->keyData.size()); + size_t keyIndex = 0; + + // Look at every key element... + BSONObjIterator keyIterator(member->keyData[0].keyData); + while (keyIterator.more()) { + BSONElement elt = keyIterator.next(); + // If we're supposed to include it... + if (_includeKey[keyIndex]) { + // Do so. + bob.appendAs(elt, _keyFieldNames[keyIndex]); + } + ++keyIndex; + } + } + + member->state = WorkingSetMember::OWNED_OBJ; + member->keyData.clear(); + member->loc = DiskLoc(); + member->obj = bob.obj(); + return Status::OK(); + } ProjectionStage::~ProjectionStage() { } @@ -59,7 +183,8 @@ namespace mongo { // tailable cursor and isEOF() would be true even if it had more data... if (PlanStage::ADVANCED == status) { WorkingSetMember* member = _ws->get(id); - Status projStatus = _exec->transform(member); + // Punt to our specific projection impl. + Status projStatus = transform(member); if (!projStatus.isOK()) { warning() << "Couldn't execute projection, status = " << projStatus.toString() << endl; diff --git a/src/mongo/db/exec/projection.h b/src/mongo/db/exec/projection.h index c558b8e434a..14420fccca0 100644 --- a/src/mongo/db/exec/projection.h +++ b/src/mongo/db/exec/projection.h @@ -36,13 +36,41 @@ namespace mongo { + struct ProjectionStageParams { + enum ProjectionImplementation { + // The default case. Will handle every projection. + NO_FAST_PATH, + + // The projection is simple inclusion and is totally covered by one index. + COVERED_ONE_INDEX, + + // The projection is simple inclusion and we expect an object. + SIMPLE_DOC + }; + + ProjectionStageParams() : projImpl(NO_FAST_PATH), fullExpression(NULL) { } + + ProjectionImplementation projImpl; + + // The projection object. We lack a ProjectionExpression or similar so we use a BSONObj. + BSONObj projObj; + + // If we have a positional or elemMatch projection we need a MatchExpression to pull out the + // right data. + // Not owned here, we do not take ownership. + const MatchExpression* fullExpression; + + // If (COVERED_ONE_INDEX == projObj) this is the key pattern we're extracting covered data + // from. Otherwise, this field is ignored. + BSONObj coveredKeyObj; + }; + /** * This stage computes a projection. */ class ProjectionStage : public PlanStage { public: - ProjectionStage(BSONObj projObj, - const MatchExpression* fullExpression, + ProjectionStage(const ProjectionStageParams& params, WorkingSet* ws, PlanStage* child); @@ -58,6 +86,8 @@ namespace mongo { PlanStageStats* getStats(); private: + Status transform(WorkingSetMember* member); + scoped_ptr<ProjectionExec> _exec; // _ws is not owned by us. @@ -66,6 +96,28 @@ namespace mongo { // Stats CommonStats _commonStats; + + // Fast paths: + ProjectionStageParams::ProjectionImplementation _projImpl; + + // Used by all projection implementations. + BSONObj _projObj; + + // Data used for both SIMPLE_DOC and COVERED_ONE_INDEX paths. + // Has the field names present in the simple projection. + unordered_set<StringData, StringData::Hasher> _includedFields; + + // + // Used for the COVERED_ONE_INDEX path. + // + BSONObj _coveredKeyObj; + + // Field names can be empty in 2.4 and before so we can't use them as a sentinel value. + // If the i-th entry is true we include the i-th field in the key. + vector<bool> _includeKey; + + // If the i-th entry of _includeKey is true this is the field name for the i-th key field. + vector<StringData> _keyFieldNames; }; } // namespace mongo diff --git a/src/mongo/db/exec/projection_exec.cpp b/src/mongo/db/exec/projection_exec.cpp index 15a224cbe72..afa60132631 100644 --- a/src/mongo/db/exec/projection_exec.cpp +++ b/src/mongo/db/exec/projection_exec.cpp @@ -249,7 +249,23 @@ namespace mongo { } BSONObjBuilder bob; - if (!requiresDocument()) { + if (member->hasObj()) { + MatchDetails matchDetails; + + // If it's a positional projection we need a MatchDetails. + if (transformRequiresDetails()) { + matchDetails.requestElemMatchKey(); + verify(NULL != _queryExpression); + verify(_queryExpression->matchesBSON(member->obj, &matchDetails)); + } + + Status projStatus = transform(member->obj, &bob, &matchDetails); + if (!projStatus.isOK()) { + return projStatus; + } + } + else { + verify(!requiresDocument()); // Go field by field. if (_includeID) { BSONElement elt; @@ -273,24 +289,6 @@ namespace mongo { } } } - else { - // Planner should have done this. - verify(member->hasObj()); - - MatchDetails matchDetails; - - // If it's a positional projection we need a MatchDetails. - if (transformRequiresDetails()) { - matchDetails.requestElemMatchKey(); - verify(NULL != _queryExpression); - verify(_queryExpression->matchesBSON(member->obj, &matchDetails)); - } - - Status projStatus = transform(member->obj, &bob, &matchDetails); - if (!projStatus.isOK()) { - return projStatus; - } - } for (MetaMap::const_iterator it = _meta.begin(); it != _meta.end(); ++it) { if (META_GEONEAR_DIST == it->second) { diff --git a/src/mongo/db/query/parsed_projection.cpp b/src/mongo/db/query/parsed_projection.cpp index 06a8006cef8..a30623e36e6 100644 --- a/src/mongo/db/query/parsed_projection.cpp +++ b/src/mongo/db/query/parsed_projection.cpp @@ -237,13 +237,7 @@ namespace mongo { // Save the raw spec. It should be owned by the LiteParsedQuery. verify(spec.isOwned()); pp->_source = spec; - - // returnKey clobbers everything. - if (hasIndexKeyProjection) { - pp->_requiresDocument = false; - *out = pp.release(); - return Status::OK(); - } + pp->_returnKey = hasIndexKeyProjection; // Dotted fields aren't covered, non-simple require match details, and as for include, "if // we default to including then we can't use an index because we don't know what we're @@ -276,6 +270,11 @@ namespace mongo { } } + // returnKey clobbers everything. + if (hasIndexKeyProjection) { + pp->_requiresDocument = false; + } + *out = pp.release(); return Status::OK(); } diff --git a/src/mongo/db/query/parsed_projection.h b/src/mongo/db/query/parsed_projection.h index 19f48a7819b..1d94057dff6 100644 --- a/src/mongo/db/query/parsed_projection.h +++ b/src/mongo/db/query/parsed_projection.h @@ -83,6 +83,10 @@ namespace mongo { return _wantGeoNearPoint; } + bool wantIndexKey() const { + return _returnKey; + } + private: /** * Must go through ::make @@ -116,6 +120,8 @@ namespace mongo { bool _wantGeoNearDistance; bool _wantGeoNearPoint; + + bool _returnKey; }; } // namespace mongo diff --git a/src/mongo/db/query/planner_analysis.cpp b/src/mongo/db/query/planner_analysis.cpp index 5eebbb2c8a3..b9ee0f44613 100644 --- a/src/mongo/db/query/planner_analysis.cpp +++ b/src/mongo/db/query/planner_analysis.cpp @@ -542,6 +542,10 @@ namespace mongo { if (NULL != query.getProj()) { QLOG() << "PROJECTION: fetched status: " << solnRoot->fetched() << endl; QLOG() << "PROJECTION: Current plan is:\n" << solnRoot->toString() << endl; + + ProjectionNode::ProjectionType projType = ProjectionNode::DEFAULT; + BSONObj coveredKeyObj; + if (query.getProj()->requiresDocument()) { QLOG() << "PROJECTION: claims to require doc adding fetch.\n"; // If the projection requires the entire document, somebody must fetch. @@ -551,7 +555,10 @@ namespace mongo { solnRoot = fetch; } } - else { + else if (!query.getProj()->wantIndexKey()) { + // The only way we're here is if it's a simple projection. That is, we can pick out + // the fields we want to include and they're not dotted. So we want to execute the + // projection in the fast-path simple fashion. Just don't know which fast path yet. QLOG() << "PROJECTION: requires fields\n"; const vector<string>& fields = query.getProj()->getRequiredFields(); bool covered = true; @@ -563,13 +570,51 @@ namespace mongo { break; } } + QLOG() << "PROJECTION: is covered?: = " << covered << endl; + // If any field is missing from the list of fields the projection wants, // a fetch is required. if (!covered) { FetchNode* fetch = new FetchNode(); fetch->children.push_back(solnRoot); solnRoot = fetch; + + // It's simple but we'll have the full document and we should just iterate + // over that. + projType = ProjectionNode::SIMPLE_DOC; + QLOG() << "PROJECTION: not covered, fetching."; + } + else { + if (solnRoot->fetched()) { + // Fetched implies hasObj() so let's run with that. + projType = ProjectionNode::SIMPLE_DOC; + QLOG() << "PROJECTION: covered via FETCH, using SIMPLE_DOC fast path"; + } + else { + // If we're here we're not fetched so we're covered. Let's see if we can + // get out of using the default projType. If there's only one leaf + // underneath and it's giving us index data we can use the faster covered + // impl. + vector<QuerySolutionNode*> leafNodes; + getLeafNodes(solnRoot, &leafNodes); + + if (1 == leafNodes.size()) { + // Both the IXSCAN and DISTINCT stages provide covered key data. + if (STAGE_IXSCAN == leafNodes[0]->getType()) { + projType = ProjectionNode::COVERED_ONE_INDEX; + IndexScanNode* ixn = static_cast<IndexScanNode*>(leafNodes[0]); + coveredKeyObj = ixn->indexKeyPattern; + QLOG() << "PROJECTION: covered via IXSCAN, using COVERED fast path"; + } + else if (STAGE_DISTINCT == leafNodes[0]->getType()) { + projType = ProjectionNode::COVERED_ONE_INDEX; + DistinctNode* dn = static_cast<DistinctNode*>(leafNodes[0]); + coveredKeyObj = dn->indexKeyPattern; + QLOG() << "PROJECTION: covered via DISTINCT, using COVERED fast path"; + } + } + } } } @@ -578,6 +623,8 @@ namespace mongo { projNode->children.push_back(solnRoot); projNode->fullExpression = query.root(); projNode->projection = query.getParsed().getProj(); + projNode->projType = projType; + projNode->coveredKeyObj = coveredKeyObj; solnRoot = projNode; } else { diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp index 36c416ca5aa..5df771b491d 100644 --- a/src/mongo/db/query/query_solution.cpp +++ b/src/mongo/db/query/query_solution.cpp @@ -561,6 +561,17 @@ namespace mongo { *ss << "PROJ\n"; addIndent(ss, indent + 1); *ss << "proj = " << projection.toString() << '\n'; + addIndent(ss, indent + 1); + if (DEFAULT == projType) { + *ss << "type = DEFAULT\n"; + } + else if (COVERED_ONE_INDEX == projType) { + *ss << "type = COVERED_ONE_INDEX\n"; + } + else { + invariant(SIMPLE_DOC == projType); + *ss << "type = SIMPLE_DOC\n"; + } addCommon(ss, indent); addIndent(ss, indent + 1); *ss << "Child:" << '\n'; diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h index 70efebcc4c5..b5ca922eb1a 100644 --- a/src/mongo/db/query/query_solution.h +++ b/src/mongo/db/query/query_solution.h @@ -419,7 +419,26 @@ namespace mongo { }; struct ProjectionNode : public QuerySolutionNode { - ProjectionNode() { } + /** + * We have a few implementations of the projection functionality. The most general + * implementation 'DEFAULT' is much slower than the fast-path implementations + * below. We only really have all the information available to choose a projection + * implementation at planning time. + */ + enum ProjectionType { + // This is the most general implementation of the projection functionality. It handles + // every case. + DEFAULT, + + // This is a fast-path for when the projection is fully covered by one index. + COVERED_ONE_INDEX, + + // This is a fast-path for when the projection only has inclusions on non-dotted fields. + SIMPLE_DOC, + }; + + ProjectionNode() : fullExpression(NULL), projType(DEFAULT) { } + virtual ~ProjectionNode() { } virtual StageType getType() const { return STAGE_PROJECTION; } @@ -466,6 +485,14 @@ namespace mongo { // Given that we don't yet have a MatchExpression analogue for the expression language, we // use a BSONObj. BSONObj projection; + + // What implementation of the projection algorithm should we use? + ProjectionType projType; + + // Only meaningful if projType == COVERED_ONE_INDEX. This is the key pattern of the index + // supplying our covered data. We can pre-compute which fields to include and cache that + // data for later if we know we only have one index. + BSONObj coveredKeyObj; }; struct SortNode : public QuerySolutionNode { @@ -676,7 +703,7 @@ namespace mongo { // This stage is created "on top" of normal planning and as such the properties // below don't really matter. - bool fetched() const { return true; } + bool fetched() const { return false; } bool hasField(const string& field) const { return !indexKeyPattern[field].eoo(); } bool sortedByDiskLoc() const { return false; } const BSONObjSet& getSort() const { return sorts; } diff --git a/src/mongo/db/query/stage_builder.cpp b/src/mongo/db/query/stage_builder.cpp index d7f7099d508..697dfbbc227 100644 --- a/src/mongo/db/query/stage_builder.cpp +++ b/src/mongo/db/query/stage_builder.cpp @@ -109,7 +109,25 @@ namespace mongo { const ProjectionNode* pn = static_cast<const ProjectionNode*>(root); PlanStage* childStage = buildStages(qsol, pn->children[0], ws); if (NULL == childStage) { return NULL; } - return new ProjectionStage(pn->projection, pn->fullExpression, ws, childStage); + ProjectionStageParams params; + params.projObj = pn->projection; + + // Stuff the right data into the params depending on what proj impl we use. + if (ProjectionNode::DEFAULT == pn->projType) { + params.fullExpression = pn->fullExpression; + params.projImpl = ProjectionStageParams::NO_FAST_PATH; + } + else if (ProjectionNode::COVERED_ONE_INDEX == pn->projType) { + params.projImpl = ProjectionStageParams::COVERED_ONE_INDEX; + params.coveredKeyObj = pn->coveredKeyObj; + invariant(!pn->coveredKeyObj.isEmpty()); + } + else { + invariant(ProjectionNode::SIMPLE_DOC == pn->projType); + params.projImpl = ProjectionStageParams::SIMPLE_DOC; + } + + return new ProjectionStage(params, ws, childStage); } else if (STAGE_LIMIT == root->getType()) { const LimitNode* ln = static_cast<const LimitNode*>(root); |