From 5dfc754e10bc411b0d23b6ec1bdadfe3c72ad91c Mon Sep 17 00:00:00 2001 From: David Storch Date: Mon, 28 Apr 2014 16:39:33 -0400 Subject: SERVER-13337 support queries with projections in the idhack runner (cherry picked from commit 10d8b3d7984a4abf0d8f80562d426d2f5f0707c3) Conflicts: src/mongo/db/query/idhack_runner.cpp --- jstests/core/idhack.js | 40 ++++++++++++--- src/mongo/db/exec/projection.cpp | 79 ++++++++++++++++------------ src/mongo/db/exec/projection.h | 20 ++++++++ src/mongo/db/exec/projection_exec.cpp | 10 +++- src/mongo/db/exec/projection_exec.h | 1 - src/mongo/db/query/idhack_runner.cpp | 97 ++++++++++++++++++++++------------- src/mongo/db/query/idhack_runner.h | 12 ++++- 7 files changed, 180 insertions(+), 79 deletions(-) diff --git a/jstests/core/idhack.js b/jstests/core/idhack.js index 311c4ebc935..11fb1e0f447 100644 --- a/jstests/core/idhack.js +++ b/jstests/core/idhack.js @@ -46,14 +46,40 @@ var skipExplain = t.find( query ).skip(1).explain(); print( "explain for skip query = " + tojson( skipExplain ) ); assert.neq( explain.cursor, skipExplain.cursor, "F1" ); -// Only acceptable projection for ID hack is {_id: 1}. -var projectionExplain = t.find( query, { _id : 0, z : 1 } ).explain(); -print( "explain for projection query = " + tojson( projectionExplain ) ); -assert.neq( explain.cursor, projectionExplain.cursor, "G1" ); - // Covered query returning _id field only can be handled by ID hack. var coveredExplain = t.find( query, { _id : 1 } ).explain(); print( "explain for covered query = " + tojson( coveredExplain ) ); -assert.eq( explain.cursor, coveredExplain.cursor, "H1" ); +assert.eq( explain.cursor, coveredExplain.cursor, "G1" ); // Check doc from covered ID hack query. -assert.eq( { _id : { x: 2 } }, t.findOne( query, { _id : 1 } ), "H2" ); +assert.eq( { _id : { x: 2 } }, t.findOne( query, { _id : 1 } ), "G2" ); + +// +// Non-covered projection for idhack. +// + +t.drop(); +t.insert( { _id: 0, a: 0, b: [ { c: 1 }, { c: 2 } ] }); +t.insert( { _id: 1, a: 1, b: [ { c: 3 }, { c: 4 } ] }); + +// Simple inclusion. +assert.eq( { _id: 1, a: 1 }, t.find( { _id: 1 }, { a: 1 } ).next() ); +assert.eq( { a: 1 }, t.find({ _id: 1 }, { _id: 0, a: 1 } ).next() ); +assert.eq( { _id: 0, a: 0 }, t.find( { _id: 0 }, { _id: 1, a: 1 } ).next() ); + +// Non-simple: exclusion. +assert.eq( { _id: 1, a: 1 }, t.find( { _id: 1 }, { b: 0 } ).next() ); +assert.eq( { _id: 0, }, t.find( { _id: 0 }, { a: 0, b: 0 } ).next() ); + +// Non-simple: dotted fields. +assert.eq( { b: [ { c: 1 }, { c: 2 } ] }, t.find( { _id: 0 }, { _id: 0, "b.c": 1 } ).next() ); +assert.eq( { _id: 1 }, t.find( { _id: 1 }, { "foo.bar": 1 } ).next() ); + +// Non-simple: elemMatch projection. +assert.eq( { _id: 1, b: [ { c: 4 } ] }, + t.find( { _id: 1 }, { b: { $elemMatch: { c: 4 } } } ).next() ); + +// Non-simple: $returnKey. +assert.eq( { _id: 1 }, t.find( { _id: 1 } )._addSpecial( "$returnKey", true ).next() ); + +// Non-simple: $returnKey overrides other projections. +assert.eq( { _id: 1 }, t.find( { _id: 1 }, { a: 1 } )._addSpecial( "$returnKey", true ).next() ); diff --git a/src/mongo/db/exec/projection.cpp b/src/mongo/db/exec/projection.cpp index 06acc8b789a..689d828b4b7 100644 --- a/src/mongo/db/exec/projection.cpp +++ b/src/mongo/db/exec/projection.cpp @@ -59,27 +59,8 @@ namespace mongo { invariant(_projObj.isOwned()); invariant(!_projObj.isEmpty()); - // The _id is included by default. - bool includeId = true; - - // Figure out what fields are in the projection. TODO: we can get this from the - // ParsedProjection...modify that to have this type instead of a vector. - BSONObjIterator projObjIt(_projObj); - while (projObjIt.more()) { - BSONElement elt = projObjIt.next(); - // Must deal with the _id case separately as there is an implicit _id: 1 in the - // projection. - if (mongoutils::str::equals(elt.fieldName(), kIdField) - && !elt.trueValue()) { - includeId = false; - continue; - } - _includedFields.insert(elt.fieldNameStringData()); - } - - if (includeId) { - _includedFields.insert(kIdField); - } + // Figure out what fields are in the projection. + getSimpleInclusionFields(_projObj, &_includedFields); // If we're pulling data out of one index we can pre-compute the indices of the fields // in the key that we pull data from and avoid looking up the field name each time. @@ -113,6 +94,49 @@ namespace mongo { } } + // static + void ProjectionStage::getSimpleInclusionFields(const BSONObj& projObj, + FieldSet* includedFields) { + // The _id is included by default. + bool includeId = true; + + // Figure out what fields are in the projection. TODO: we can get this from the + // ParsedProjection...modify that to have this type instead of a vector. + BSONObjIterator projObjIt(projObj); + while (projObjIt.more()) { + BSONElement elt = projObjIt.next(); + // Must deal with the _id case separately as there is an implicit _id: 1 in the + // projection. + if (mongoutils::str::equals(elt.fieldName(), kIdField) + && !elt.trueValue()) { + includeId = false; + continue; + } + includedFields->insert(elt.fieldNameStringData()); + } + + if (includeId) { + includedFields->insert(kIdField); + } + } + + // static + void ProjectionStage::transformSimpleInclusion(const BSONObj& in, + const FieldSet& includedFields, + BSONObjBuilder& bob) { + // Look at every field in the source document and see if we're including it. + BSONObjIterator inputIt(in); + while (inputIt.more()) { + BSONElement elt = inputIt.next(); + unordered_set::const_iterator fieldIt; + fieldIt = includedFields.find(elt.fieldNameStringData()); + if (includedFields.end() != fieldIt) { + // If so, add it to the builder. + bob.append(elt); + } + } + } + Status ProjectionStage::transform(WorkingSetMember* member) { // The default no-fast-path case. if (ProjectionStageParams::NO_FAST_PATH == _projImpl) { @@ -131,17 +155,8 @@ namespace mongo { // If we got here because of SIMPLE_DOC the planner shouldn't have messed up. invariant(member->hasObj()); - // Look at every field in the source document and see if we're including it. - BSONObjIterator inputIt(member->obj); - while (inputIt.more()) { - BSONElement elt = inputIt.next(); - unordered_set::iterator fieldIt; - fieldIt = _includedFields.find(elt.fieldNameStringData()); - if (_includedFields.end() != fieldIt) { - // If so, add it to the builder. - bob.append(elt); - } - } + // Apply the SIMPLE_DOC projection. + transformSimpleInclusion(member->obj, _includedFields, bob); } else { invariant(ProjectionStageParams::COVERED_ONE_INDEX == _projImpl); diff --git a/src/mongo/db/exec/projection.h b/src/mongo/db/exec/projection.h index 14420fccca0..e93ee682170 100644 --- a/src/mongo/db/exec/projection.h +++ b/src/mongo/db/exec/projection.h @@ -85,6 +85,26 @@ namespace mongo { PlanStageStats* getStats(); + typedef unordered_set FieldSet; + + /** + * Given the projection spec for a simple inclusion projection, + * 'projObj', populates 'includedFields' with the set of field + * names to be included. + */ + static void getSimpleInclusionFields(const BSONObj& projObj, + FieldSet* includedFields); + + /** + * Applies a simple inclusion projection to 'in', including + * only the fields specified by 'includedFields'. + * + * The resulting document is constructed using 'bob'. + */ + static void transformSimpleInclusion(const BSONObj& in, + const FieldSet& includedFields, + BSONObjBuilder& bob); + private: Status transform(WorkingSetMember* member); diff --git a/src/mongo/db/exec/projection_exec.cpp b/src/mongo/db/exec/projection_exec.cpp index afa60132631..27eed3f90d4 100644 --- a/src/mongo/db/exec/projection_exec.cpp +++ b/src/mongo/db/exec/projection_exec.cpp @@ -347,8 +347,16 @@ namespace mongo { } Status ProjectionExec::transform(const BSONObj& in, BSONObj* out) const { + // If it's a positional projection we need a MatchDetails. + MatchDetails matchDetails; + if (transformRequiresDetails()) { + matchDetails.requestElemMatchKey(); + verify(NULL != _queryExpression); + verify(_queryExpression->matchesBSON(in, &matchDetails)); + } + BSONObjBuilder bob; - Status s = transform(in, &bob, NULL); + Status s = transform(in, &bob, &matchDetails); if (!s.isOK()) { return s; } diff --git a/src/mongo/db/exec/projection_exec.h b/src/mongo/db/exec/projection_exec.h index 796e8c62283..e358c76f686 100644 --- a/src/mongo/db/exec/projection_exec.h +++ b/src/mongo/db/exec/projection_exec.h @@ -76,7 +76,6 @@ namespace mongo { /** * Apply this projection to the object 'in'. - * 'this' must be a simple inclusion/exclusion projection. * * Upon success, 'out' is set to the new object and Status::OK() is returned. * Otherwise, returns an error Status and *out is not mutated. diff --git a/src/mongo/db/query/idhack_runner.cpp b/src/mongo/db/query/idhack_runner.cpp index 2ac97e02aef..7fcea5c9267 100644 --- a/src/mongo/db/query/idhack_runner.cpp +++ b/src/mongo/db/query/idhack_runner.cpp @@ -32,6 +32,7 @@ #include "mongo/db/structure/btree/btree.h" #include "mongo/db/catalog/index_catalog.h" #include "mongo/db/diskloc.h" +#include "mongo/db/exec/projection.h" #include "mongo/db/index/btree_access_method.h" #include "mongo/db/index/index_descriptor.h" #include "mongo/db/jsobj.h" @@ -42,30 +43,6 @@ #include "mongo/db/catalog/collection.h" #include "mongo/s/d_logic.h" -namespace { - - using namespace mongo; - - /** - * Does the query contain a projection on {_id: 1}? - */ - bool hasIDProjection(const CanonicalQuery* query) { - // We don't know the answer if the query is NULL. - if (!query) { - return false; - } - // No projection means not covered. - if (!query->getProj()) { - return false; - } - // Since the only supported projection is {_id: 1}, - // a valid ParsedProjection is enough to indicate that - // we have a covered query. - return true; - } - -} // namespace - namespace mongo { IDHackRunner::IDHackRunner(const Collection* collection, CanonicalQuery* query) @@ -119,7 +96,7 @@ namespace mongo { if (NULL == objOut) { // No object requested - nothing to do. } - else if (hasIDProjection(_query.get())) { + else if (hasCoveredProjection()) { // Covered query on _id field only. // Set object to search key. // Search key is retrieved from the canonical query at @@ -130,8 +107,6 @@ namespace mongo { *objOut = _key.getOwned(); } else { - invariant(!hasIDProjection(_query.get())); - // Fetch object from storage. Record* record = loc.rec(); @@ -155,8 +130,12 @@ namespace mongo { } } - // Either the data was in memory or we paged it in. - *objOut = loc.obj(); + // If we're here, either the data was in memory or we paged it in. + + if (!applyProjection(loc, objOut)) { + // No projection. Just return the object inside the diskloc. + *objOut = loc.obj(); + } // If we're sharded make sure the key belongs to us. We need the object to do this. if (shardingState.needCollectionMetadata(_collection->ns().ns())) { @@ -181,6 +160,48 @@ namespace mongo { return Runner::RUNNER_ADVANCED; } + bool IDHackRunner::applyProjection(const DiskLoc& loc, BSONObj* objOut) const { + if (NULL == _query.get() || NULL == _query->getProj()) { + // This idhack query does not have a projection. + return false; + } + + const BSONObj& docAtLoc = loc.obj(); + + // We have a non-covered projection (covered projections should be handled earlier, + // in getNext(..). For simple inclusion projections we use a fast path similar to that + // implemented in the ProjectionStage. For non-simple inclusion projections we fallback + // to ProjectionExec. + const BSONObj& projObj = _query->getParsed().getProj(); + + if (_query->getProj()->wantIndexKey()) { + // $returnKey is specified. This overrides everything else. + BSONObjBuilder bob; + const BSONObj& queryObj = _query->getParsed().getFilter(); + bob.append(queryObj["_id"]); + *objOut = bob.obj(); + } + else if (_query->getProj()->requiresDocument() || _query->getProj()->wantIndexKey()) { + // Not a simple projection, so fallback on the regular projection path. + ProjectionExec projExec(projObj, _query->root()); + projExec.transform(docAtLoc, objOut); + } + else { + // This is a simple inclusion projection. Start by getting the set + // of fields to include. + unordered_set includedFields; + ProjectionStage::getSimpleInclusionFields(projObj, &includedFields); + + // Apply the simple inclusion projection. + BSONObjBuilder bob; + ProjectionStage::transformSimpleInclusion(docAtLoc, includedFields, bob); + + *objOut = bob.obj(); + } + + return true; + } + bool IDHackRunner::isEOF() { return _killed || _done; } @@ -227,8 +248,8 @@ namespace mongo { BSONElement keyElt = _key.firstElement(); BSONObj indexBounds = BSON("_id" << BSON_ARRAY( BSON_ARRAY( keyElt << keyElt ) ) ); (*explain)->setIndexBounds(indexBounds); - // Covered projection is only one supported. - (*explain)->setIndexOnly(hasIDProjection(_query.get())); + // ID hack queries are only considered covered if they have the projection {_id: 1}. + (*explain)->setIndexOnly(hasCoveredProjection()); } else if (NULL != planInfo) { *planInfo = new PlanInfo(); @@ -243,18 +264,22 @@ namespace mongo { return !query.getParsed().showDiskLoc() && query.getParsed().getHint().isEmpty() && 0 == query.getParsed().getSkip() - && canUseProjection(query) && CanonicalQuery::isSimpleIdQuery(query.getParsed().getFilter()) && !query.getParsed().hasOption(QueryOption_CursorTailable); } // static - bool IDHackRunner::canUseProjection(const CanonicalQuery& query) { - const ParsedProjection* proj = query.getProj(); + bool IDHackRunner::hasCoveredProjection() const { + // Some update operations use the IDHackRunner without creating a + // canonical query. In this case, _query will be NULL. Just return + // false, as we won't have to do any projection handling for updates. + if (NULL == _query.get()) { + return false; + } - // No projection is OK - ID Hack will fetch entire document. + const ParsedProjection* proj = _query->getProj(); if (!proj) { - return true; + return false; } // If there is a projection, it has to be a covered projection on diff --git a/src/mongo/db/query/idhack_runner.h b/src/mongo/db/query/idhack_runner.h index 6d3f6aedb6b..fce573ee6e5 100644 --- a/src/mongo/db/query/idhack_runner.h +++ b/src/mongo/db/query/idhack_runner.h @@ -85,9 +85,17 @@ namespace mongo { private: /** - * ID Hack will work with only one projection: {_id: 1}. + * ID Hack queries are only covered with the projection {_id: 1}. */ - static bool canUseProjection(const CanonicalQuery& query); + bool hasCoveredProjection() const; + + /** + * If '_query' has a projection, then apply it, returning the result in 'objOut'. + * The diskloc 'loc' contains the BSONObj to transform. + * + * Otherwise do nothing and return false. + */ + bool applyProjection(const DiskLoc& loc, BSONObj* objOut) const; // Not owned here. const Collection* _collection; -- cgit v1.2.1