diff options
author | Charlie Swanson <charlie.swanson@mongodb.com> | 2016-06-21 13:47:18 -0400 |
---|---|---|
committer | Charlie Swanson <charlie.swanson@mongodb.com> | 2016-06-24 18:07:36 -0400 |
commit | 752eb0f448987b3e4183beacc622f4e95e7803be (patch) | |
tree | 525ab64b7e0ae8a5e4fba3da7feefd34e2600581 /src/mongo | |
parent | 7e3ce9cec306df5abb15c0c1a599023270cfb807 (diff) | |
download | mongo-752eb0f448987b3e4183beacc622f4e95e7803be.tar.gz |
SERVER-18966 Add the ability to exclude fields in $project stage
Diffstat (limited to 'src/mongo')
27 files changed, 3386 insertions, 2324 deletions
diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript index 32c0f82eae8..fb7d18591a1 100644 --- a/src/mongo/db/pipeline/SConscript +++ b/src/mongo/db/pipeline/SConscript @@ -181,6 +181,7 @@ docSourceEnv.Library( 'document_value', 'expression', 'expression_context', + 'parsed_aggregation_projection', '$BUILD_DIR/mongo/client/clientdriver', '$BUILD_DIR/mongo/db/bson/dotted_path_support', '$BUILD_DIR/mongo/db/matcher/expressions', @@ -292,3 +293,41 @@ env.CppUnitTest( '$BUILD_DIR/mongo/base', ] ) + +env.Library( + target='parsed_aggregation_projection', + source=[ + 'parsed_aggregation_projection.cpp', + 'parsed_exclusion_projection.cpp', + 'parsed_inclusion_projection.cpp', + ], + LIBDEPS=[ + 'expression', + 'field_path', + '$BUILD_DIR/mongo/db/matcher/expression_algo', + ] +) + +env.CppUnitTest( + target='parsed_exclusion_projection_test', + source='parsed_exclusion_projection_test.cpp', + LIBDEPS=[ + 'parsed_aggregation_projection', + ], +) + +env.CppUnitTest( + target='parsed_aggregation_projection_test', + source='parsed_aggregation_projection_test.cpp', + LIBDEPS=[ + 'parsed_aggregation_projection', + ], +) + +env.CppUnitTest( + target='parsed_inclusion_projection_test', + source='parsed_inclusion_projection_test.cpp', + LIBDEPS=[ + 'parsed_aggregation_projection', + ], +) diff --git a/src/mongo/db/pipeline/document_source.h b/src/mongo/db/pipeline/document_source.h index df3f4eecaf5..3ab47c559fd 100644 --- a/src/mongo/db/pipeline/document_source.h +++ b/src/mongo/db/pipeline/document_source.h @@ -49,6 +49,7 @@ #include "mongo/db/pipeline/expression.h" #include "mongo/db/pipeline/expression_context.h" #include "mongo/db/pipeline/lookup_set_cache.h" +#include "mongo/db/pipeline/parsed_aggregation_projection.h" #include "mongo/db/pipeline/pipeline.h" #include "mongo/db/pipeline/value.h" #include "mongo/db/query/plan_summary_stats.h" @@ -977,46 +978,41 @@ private: class DocumentSourceProject final : public DocumentSource { public: - // virtuals from DocumentSource boost::optional<Document> getNext() final; const char* getSourceName() const final; + Value serialize(bool explain = false) const final; + void dispose() final; + + /** + * Adds any paths that are included via this projection, or that are referenced by any + * expressions. + */ + GetDepsReturn getDependencies(DepsTracker* deps) const final; + /** * Attempt to move a subsequent $skip or $limit stage before the $project, thus reducing the * number of documents that pass through this stage. */ Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) final; - boost::intrusive_ptr<DocumentSource> optimize() final; - Value serialize(bool explain = false) const final; - - virtual GetDepsReturn getDependencies(DepsTracker* deps) const; /** - Create a new projection DocumentSource from BSON. - - This is a convenience for directly handling BSON, and relies on the - above methods. + * Optimize any expressions being used in this stage. + */ + boost::intrusive_ptr<DocumentSource> optimize() final; - @param pBsonElement the BSONElement with an object named $project - @param pExpCtx the expression context for the pipeline - @returns the created projection + /** + * Parse the projection from the user-supplied BSON. */ static boost::intrusive_ptr<DocumentSource> createFromBson( BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx); - /** projection as specified by the user */ - BSONObj getRaw() const { - return _raw; - } - private: - DocumentSourceProject(const boost::intrusive_ptr<ExpressionContext>& pExpCtx, - const boost::intrusive_ptr<ExpressionObject>& exprObj); + DocumentSourceProject( + const boost::intrusive_ptr<ExpressionContext>& expCtx, + std::unique_ptr<parsed_aggregation_projection::ParsedAggregationProjection> parsedProject); - // configuration state - std::unique_ptr<Variables> _variables; - boost::intrusive_ptr<ExpressionObject> pEO; - BSONObj _raw; + std::unique_ptr<parsed_aggregation_projection::ParsedAggregationProjection> _parsedProject; }; class DocumentSourceRedact final : public DocumentSource { @@ -1446,7 +1442,7 @@ public: const boost::optional<std::string>& includeArrayIndex); std::string getUnwindPath() const { - return _unwindPath.getPath(false); + return _unwindPath.fullPath(); } bool preserveNullAndEmptyArrays() const { @@ -1495,7 +1491,7 @@ public: } Value serialize(bool explain = false) const final; BSONObjSet getOutputSorts() final { - return {BSON(distanceField->getPath(false) << -1)}; + return {BSON(distanceField->fullPath() << -1)}; } // Virtuals for SplittableDocumentSource @@ -1564,7 +1560,7 @@ public: void dispose() final; BSONObjSet getOutputSorts() final { - return DocumentSource::truncateSortSet(pSource->getOutputSorts(), {_as.getPath(false)}); + return DocumentSource::truncateSortSet(pSource->getOutputSorts(), {_as.fullPath()}); } bool needsPrimaryShard() const final { @@ -1639,7 +1635,7 @@ public: Pipeline::SourceContainer* container) final; GetDepsReturn getDependencies(DepsTracker* deps) const final { - _startWith->addDependencies(deps, nullptr); + _startWith->addDependencies(deps); return SEE_NEXT; }; diff --git a/src/mongo/db/pipeline/document_source_facet.cpp b/src/mongo/db/pipeline/document_source_facet.cpp index 72a69ade0db..3d4ff056554 100644 --- a/src/mongo/db/pipeline/document_source_facet.cpp +++ b/src/mongo/db/pipeline/document_source_facet.cpp @@ -154,7 +154,7 @@ intrusive_ptr<DocumentSource> DocumentSourceFacet::createFromBson( StringMap<intrusive_ptr<Pipeline>> facetPipelines; for (auto&& facetElem : elem.embeddedObject()) { const auto facetName = facetElem.fieldNameStringData(); - FieldPath::uassertValidFieldName(facetName.toString()); + FieldPath::uassertValidFieldName(facetName); uassert(40170, str::stream() << "arguments to $facet must be arrays, " << facetName << " is type " << typeName(facetElem.type()), diff --git a/src/mongo/db/pipeline/document_source_geo_near.cpp b/src/mongo/db/pipeline/document_source_geo_near.cpp index 968958e3e52..5f4e6ae0552 100644 --- a/src/mongo/db/pipeline/document_source_geo_near.cpp +++ b/src/mongo/db/pipeline/document_source_geo_near.cpp @@ -88,7 +88,7 @@ intrusive_ptr<DocumentSource> DocumentSourceGeoNear::getShardSource() { return this; } intrusive_ptr<DocumentSource> DocumentSourceGeoNear::getMergeSource() { - return DocumentSourceSort::create(pExpCtx, BSON(distanceField->getPath(false) << 1), limit); + return DocumentSourceSort::create(pExpCtx, BSON(distanceField->fullPath() << 1), limit); } Value DocumentSourceGeoNear::serialize(bool explain) const { @@ -101,7 +101,7 @@ Value DocumentSourceGeoNear::serialize(bool explain) const { } // not in buildGeoNearCmd - result.setField("distanceField", Value(distanceField->getPath(false))); + result.setField("distanceField", Value(distanceField->fullPath())); result.setField("limit", Value(limit)); @@ -116,7 +116,7 @@ Value DocumentSourceGeoNear::serialize(bool explain) const { result.setField("distanceMultiplier", Value(distanceMultiplier)); if (includeLocs) - result.setField("includeLocs", Value(includeLocs->getPath(false))); + result.setField("includeLocs", Value(includeLocs->fullPath())); return Value(DOC(getSourceName() << result.freeze())); } diff --git a/src/mongo/db/pipeline/document_source_graph_lookup.cpp b/src/mongo/db/pipeline/document_source_graph_lookup.cpp index e64c6b37039..e42ad4a4c73 100644 --- a/src/mongo/db/pipeline/document_source_graph_lookup.cpp +++ b/src/mongo/db/pipeline/document_source_graph_lookup.cpp @@ -210,7 +210,7 @@ bool DocumentSourceGraphLookUp::addToVisitedAndFrontier(BSONObj result, long lon // We have not seen this node before. If '_depthField' was specified, add the field to the // object. BSONObj fullObject = - _depthField ? addDepthFieldToObject(_depthField->getPath(false), depth, result) : result; + _depthField ? addDepthFieldToObject(_depthField->fullPath(), depth, result) : result; // Add the object to our '_visited' list. _visited[_id] = fullObject; @@ -223,7 +223,7 @@ bool DocumentSourceGraphLookUp::addToVisitedAndFrontier(BSONObj result, long lon // array, we treat it as connecting to multiple values, so we must add each element to // '_frontier'. BSONElementSet recurseOnValues; - dps::extractAllElementsAlongPath(result, _connectFromField.getPath(false), recurseOnValues); + dps::extractAllElementsAlongPath(result, _connectFromField.fullPath(), recurseOnValues); for (auto&& elem : recurseOnValues) { Value recurseOn = Value(elem); @@ -246,7 +246,7 @@ bool DocumentSourceGraphLookUp::addToVisitedAndFrontier(BSONObj result, long lon void DocumentSourceGraphLookUp::addToCache(const BSONObj& result, const unordered_set<Value, Value::Hash>& queried) { BSONElementSet cacheByValues; - dps::extractAllElementsAlongPath(result, _connectToField.getPath(false), cacheByValues); + dps::extractAllElementsAlongPath(result, _connectToField.fullPath(), cacheByValues); for (auto&& elem : cacheByValues) { Value cacheBy(elem); @@ -288,7 +288,7 @@ boost::optional<BSONObj> DocumentSourceGraphLookUp::constructQuery(BSONObjSet* c // Create a query of the form {_connectToField: {$in: [...]}}. BSONObjBuilder query; - BSONObjBuilder subobj(query.subobjStart(_connectToField.getPath(false))); + BSONObjBuilder subobj(query.subobjStart(_connectToField.fullPath())); BSONArrayBuilder in(subobj.subarrayStart("$in")); for (auto&& value : _frontier) { @@ -330,7 +330,7 @@ Pipeline::SourceContainer::iterator DocumentSourceGraphLookUp::optimizeAt( // If we are not already handling an $unwind stage internally, we can combine with the following // $unwind stage. auto nextUnwind = dynamic_cast<DocumentSourceUnwind*>((*std::next(itr)).get()); - if (nextUnwind && !_unwind && nextUnwind->getUnwindPath() == _as.getPath(false)) { + if (nextUnwind && !_unwind && nextUnwind->getUnwindPath() == _as.fullPath()) { _unwind = std::move(nextUnwind); container->erase(std::next(itr)); return itr; @@ -339,12 +339,12 @@ Pipeline::SourceContainer::iterator DocumentSourceGraphLookUp::optimizeAt( } BSONObjSet DocumentSourceGraphLookUp::getOutputSorts() { - std::set<std::string> fields{_as.getPath(false)}; + std::set<std::string> fields{_as.fullPath()}; if (_depthField) { - fields.insert(_depthField->getPath(false)); + fields.insert(_depthField->fullPath()); } if (_unwind && (*_unwind)->indexPath()) { - fields.insert((*_unwind)->indexPath()->getPath(false)); + fields.insert((*_unwind)->indexPath()->fullPath()); } return DocumentSource::truncateSortSet(pSource->getOutputSorts(), fields); @@ -360,17 +360,16 @@ void DocumentSourceGraphLookUp::checkMemoryUsage() { void DocumentSourceGraphLookUp::serializeToArray(std::vector<Value>& array, bool explain) const { // Serialize default options. - MutableDocument spec(DOC("from" << _from.coll() << "as" << _as.getPath(false) - << "connectToField" - << _connectToField.getPath(false) + MutableDocument spec(DOC("from" << _from.coll() << "as" << _as.fullPath() << "connectToField" + << _connectToField.fullPath() << "connectFromField" - << _connectFromField.getPath(false) + << _connectFromField.fullPath() << "startWith" << _startWith->serialize(false))); // depthField is optional; serialize it if it was specified. if (_depthField) { - spec["depthField"] = Value(_depthField->getPath(false)); + spec["depthField"] = Value(_depthField->fullPath()); } if (_maxDepth) { @@ -380,11 +379,10 @@ void DocumentSourceGraphLookUp::serializeToArray(std::vector<Value>& array, bool // If we are explaining, include an absorbed $unwind inside the $graphLookup specification. if (_unwind && explain) { const boost::optional<FieldPath> indexPath = (*_unwind)->indexPath(); - spec["unwinding"] = - Value(DOC("preserveNullAndEmptyArrays" - << (*_unwind)->preserveNullAndEmptyArrays() - << "includeArrayIndex" - << (indexPath ? Value((*indexPath).getPath(false)) : Value()))); + spec["unwinding"] = Value(DOC("preserveNullAndEmptyArrays" + << (*_unwind)->preserveNullAndEmptyArrays() + << "includeArrayIndex" + << (indexPath ? Value((*indexPath).fullPath()) : Value()))); } array.push_back(Value(DOC(getSourceName() << spec.freeze()))); diff --git a/src/mongo/db/pipeline/document_source_group.cpp b/src/mongo/db/pipeline/document_source_group.cpp index 57209147a0e..b7a59ddde64 100644 --- a/src/mongo/db/pipeline/document_source_group.cpp +++ b/src/mongo/db/pipeline/document_source_group.cpp @@ -313,8 +313,7 @@ intrusive_ptr<DocumentSource> DocumentSourceGroup::createFromBson( intrusive_ptr<Expression> pGroupExpr; BSONType elementType = subElement.type(); if (elementType == Object) { - Expression::ObjectCtx oCtx(Expression::ObjectCtx::DOCUMENT_OK); - pGroupExpr = Expression::parseObject(subElement.Obj(), &oCtx, vps); + pGroupExpr = Expression::parseObject(subElement.Obj(), vps); } else if (elementType == Array) { uasserted(15953, str::stream() << "aggregating group operators are unary (" << name @@ -396,7 +395,7 @@ void getFieldPathMap(ExpressionObject* expressionObj, if (expObj) { getFieldPathMap(expObj, newPrefix, fields); } else if (expPath) { - (*fields)[expPath->getFieldPath().tail().getPath(false)] = newPrefix; + (*fields)[expPath->getFieldPath().tail().fullPath()] = newPrefix; } } } @@ -742,8 +741,7 @@ void DocumentSourceGroup::parseIdExpression(BSONElement groupField, const BSONObj idKeyObj = groupField.Obj(); if (idKeyObj.firstElementFieldName()[0] == '$') { // grouping on a $op expression - Expression::ObjectCtx oCtx(0); - _idExpressions.push_back(Expression::parseObject(idKeyObj, &oCtx, vps)); + _idExpressions.push_back(Expression::parseObject(idKeyObj, vps)); } else { // grouping on an "artificial" object. Rather than create the object for each input // in initialize(), instead group on the output of the raw expressions. The artificial diff --git a/src/mongo/db/pipeline/document_source_lookup.cpp b/src/mongo/db/pipeline/document_source_lookup.cpp index 2ea55bb4cbb..5be6239cb45 100644 --- a/src/mongo/db/pipeline/document_source_lookup.cpp +++ b/src/mongo/db/pipeline/document_source_lookup.cpp @@ -94,7 +94,7 @@ boost::optional<Document> DocumentSourceLookUp::getNext() { // We have internalized a $match, but have not yet computed the descended $match that should // be applied to our queries. _additionalFilter = DocumentSourceMatch::descendMatchOnPath( - _matchSrc->getMatchExpression(), _as.getPath(false), pExpCtx) + _matchSrc->getMatchExpression(), _as.fullPath(), pExpCtx) ->getQuery(); } @@ -139,7 +139,7 @@ Pipeline::SourceContainer::iterator DocumentSourceLookUp::optimizeAt( // If we are not already handling an $unwind stage internally, we can combine with the // following $unwind stage. - if (nextUnwind && !_handlingUnwind && nextUnwind->getUnwindPath() == _as.getPath(false)) { + if (nextUnwind && !_handlingUnwind && nextUnwind->getUnwindPath() == _as.fullPath()) { _unwindSrc = std::move(nextUnwind); _handlingUnwind = true; container->erase(std::next(itr)); @@ -154,11 +154,11 @@ Pipeline::SourceContainer::iterator DocumentSourceLookUp::optimizeAt( // Attempt to move part of the $match before ourselves, and internalize any predicates upon the // "_as" field. - std::string outputPath = _as.getPath(false); + std::string outputPath = _as.fullPath(); std::set<std::string> fields = {outputPath}; if (_handlingUnwind && _unwindSrc->indexPath()) { - fields.insert((*_unwindSrc->indexPath()).getPath(false)); + fields.insert((*_unwindSrc->indexPath()).fullPath()); } // Attempt to split the $match, putting the independent portion before ourselves. @@ -374,12 +374,11 @@ boost::optional<Document> DocumentSourceLookUp::unwindResult() { } void DocumentSourceLookUp::serializeToArray(std::vector<Value>& array, bool explain) const { - MutableDocument output( - DOC(getSourceName() << DOC("from" << _fromNs.coll() << "as" << _as.getPath(false) - << "localField" - << _localField.getPath(false) - << "foreignField" - << _foreignField.getPath(false)))); + MutableDocument output(DOC( + getSourceName() << DOC("from" << _fromNs.coll() << "as" << _as.fullPath() << "localField" + << _localField.fullPath() + << "foreignField" + << _foreignField.fullPath()))); if (explain) { if (_handlingUnwind) { const boost::optional<FieldPath> indexPath = _unwindSrc->indexPath(); @@ -387,7 +386,7 @@ void DocumentSourceLookUp::serializeToArray(std::vector<Value>& array, bool expl Value(DOC("preserveNullAndEmptyArrays" << _unwindSrc->preserveNullAndEmptyArrays() << "includeArrayIndex" - << (indexPath ? Value(indexPath->getPath(false)) : Value()))); + << (indexPath ? Value(indexPath->fullPath()) : Value()))); } if (_matchSrc) { @@ -395,7 +394,7 @@ void DocumentSourceLookUp::serializeToArray(std::vector<Value>& array, bool expl // descended match expression. output[getSourceName()]["matching"] = Value(DocumentSourceMatch::descendMatchOnPath( - _matchSrc->getMatchExpression(), _as.getPath(false), pExpCtx) + _matchSrc->getMatchExpression(), _as.fullPath(), pExpCtx) ->getQuery()); } @@ -417,7 +416,7 @@ void DocumentSourceLookUp::serializeToArray(std::vector<Value>& array, bool expl } DocumentSource::GetDepsReturn DocumentSourceLookUp::getDependencies(DepsTracker* deps) const { - deps->fields.insert(_localField.getPath(false)); + deps->fields.insert(_localField.fullPath()); return SEE_NEXT; } diff --git a/src/mongo/db/pipeline/document_source_project.cpp b/src/mongo/db/pipeline/document_source_project.cpp index 462621bc79e..f6b70b840bd 100644 --- a/src/mongo/db/pipeline/document_source_project.cpp +++ b/src/mongo/db/pipeline/document_source_project.cpp @@ -28,23 +28,27 @@ #include "mongo/platform/basic.h" +#include "mongo/db/pipeline/document_source.h" + +#include <boost/optional.hpp> #include <boost/smart_ptr/intrusive_ptr.hpp> -#include "mongo/db/jsobj.h" #include "mongo/db/pipeline/document.h" -#include "mongo/db/pipeline/document_source.h" #include "mongo/db/pipeline/expression.h" +#include "mongo/db/pipeline/parsed_aggregation_projection.h" #include "mongo/db/pipeline/value.h" +#include "mongo/stdx/memory.h" namespace mongo { using boost::intrusive_ptr; -using std::string; -using std::vector; +using parsed_aggregation_projection::ParsedAggregationProjection; +using parsed_aggregation_projection::ProjectionType; -DocumentSourceProject::DocumentSourceProject(const intrusive_ptr<ExpressionContext>& pExpCtx, - const intrusive_ptr<ExpressionObject>& exprObj) - : DocumentSource(pExpCtx), pEO(exprObj) {} +DocumentSourceProject::DocumentSourceProject( + const intrusive_ptr<ExpressionContext>& expCtx, + std::unique_ptr<ParsedAggregationProjection> parsedProject) + : DocumentSource(expCtx), _parsedProject(std::move(parsedProject)) {} REGISTER_DOCUMENT_SOURCE(project, DocumentSourceProject::createFromBson); @@ -55,31 +59,16 @@ const char* DocumentSourceProject::getSourceName() const { boost::optional<Document> DocumentSourceProject::getNext() { pExpCtx->checkForInterrupt(); - boost::optional<Document> input = pSource->getNext(); - if (!input) + auto input = pSource->getNext(); + if (!input) { return boost::none; + } - /* create the result document */ - const size_t sizeHint = pEO->getSizeHint(); - MutableDocument out(sizeHint); - out.copyMetaDataFrom(*input); - - /* - Use the ExpressionObject to create the base result. - - If we're excluding fields at the top level, leave out the _id if - it is found, because we took care of it above. - */ - _variables->setRoot(*input); - pEO->addToDocument(out, *input, _variables.get()); - _variables->clearRoot(); - - return out.freeze(); + return _parsedProject->applyProjection(*input); } intrusive_ptr<DocumentSource> DocumentSourceProject::optimize() { - intrusive_ptr<Expression> pE(pEO->optimize()); - pEO = boost::dynamic_pointer_cast<ExpressionObject>(pE); + _parsedProject->optimize(); return this; } @@ -99,38 +88,32 @@ Pipeline::SourceContainer::iterator DocumentSourceProject::optimizeAt( return std::next(itr); } +void DocumentSourceProject::dispose() { + _parsedProject.reset(); +} + Value DocumentSourceProject::serialize(bool explain) const { - return Value(DOC(getSourceName() << pEO->serialize(explain))); + return Value(Document{{getSourceName(), _parsedProject->serialize(explain)}}); } intrusive_ptr<DocumentSource> DocumentSourceProject::createFromBson( - BSONElement elem, const intrusive_ptr<ExpressionContext>& pExpCtx) { - /* validate */ + BSONElement elem, const intrusive_ptr<ExpressionContext>& expCtx) { uassert(15969, "$project specification must be an object", elem.type() == Object); - Expression::ObjectCtx objectCtx(Expression::ObjectCtx::DOCUMENT_OK | - Expression::ObjectCtx::TOP_LEVEL | - Expression::ObjectCtx::INCLUSION_OK); - - VariablesIdGenerator idGenerator; - VariablesParseState vps(&idGenerator); - intrusive_ptr<Expression> parsed = Expression::parseObject(elem.Obj(), &objectCtx, vps); - ExpressionObject* exprObj = dynamic_cast<ExpressionObject*>(parsed.get()); - massert(16402, "parseObject() returned wrong type of Expression", exprObj); - uassert(16403, "$projection requires at least one output field", exprObj->getFieldCount()); - - intrusive_ptr<DocumentSourceProject> pProject(new DocumentSourceProject(pExpCtx, exprObj)); - pProject->_variables.reset(new Variables(idGenerator.getIdCount())); - - BSONObj projectObj = elem.Obj(); - pProject->_raw = projectObj.getOwned(); - - return pProject; + return new DocumentSourceProject(expCtx, ParsedAggregationProjection::create(elem.Obj())); } DocumentSource::GetDepsReturn DocumentSourceProject::getDependencies(DepsTracker* deps) const { - vector<string> path; // empty == top-level - pEO->addDependencies(deps, &path); - return EXHAUSTIVE_FIELDS; + // Add any fields referenced by the projection. + _parsedProject->addDependencies(deps); + + if (_parsedProject->getType() == ProjectionType::kInclusion) { + // Stop looking for further dependencies later in the pipeline, since anything that is not + // explicitly included or added in this projection will not exist after this stage, so would + // be pointless to include in our dependencies. + return EXHAUSTIVE_FIELDS; + } else { + return SEE_NEXT; + } } } diff --git a/src/mongo/db/pipeline/document_source_sort.cpp b/src/mongo/db/pipeline/document_source_sort.cpp index c9c21043497..a1456d7c70a 100644 --- a/src/mongo/db/pipeline/document_source_sort.cpp +++ b/src/mongo/db/pipeline/document_source_sort.cpp @@ -119,7 +119,7 @@ Document DocumentSourceSort::serializeSortKey(bool explain) const { const FieldPath& withVariable = efp->getFieldPath(); verify(withVariable.getPathLength() > 1); verify(withVariable.getFieldName(0) == "ROOT"); - const string fieldPath = withVariable.tail().getPath(false); + const string fieldPath = withVariable.tail().fullPath(); // append a named integer based on the sort order keyObj.setField(fieldPath, Value(vAscending[i] ? 1 : -1)); diff --git a/src/mongo/db/pipeline/document_source_test.cpp b/src/mongo/db/pipeline/document_source_test.cpp index 58b9732cdae..06c7f4a94db 100644 --- a/src/mongo/db/pipeline/document_source_test.cpp +++ b/src/mongo/db/pipeline/document_source_test.cpp @@ -41,6 +41,7 @@ #include "mongo/dbtests/dbtests.h" #include "mongo/stdx/memory.h" #include "mongo/unittest/temp_dir.h" +#include "mongo/unittest/unittest.h" #include "mongo/util/clock_source_mock.h" #include "mongo/util/tick_source_mock.h" @@ -202,6 +203,9 @@ public: namespace Mock { using mongo::DocumentSourceMock; +/** + * A fixture which provides access to things like a ServiceContext that are needed by other tests. + */ class Base { public: Base() @@ -1349,148 +1353,152 @@ public: namespace DocumentSourceProject { -using mongo::DocumentSourceProject; using mongo::DocumentSourceMock; +using mongo::DocumentSourceProject; -class Base : public Mock::Base { +// +// DocumentSourceProject delegates much of its responsibilities to the ParsedAggregationProjection. +// Most of the functional tests are testing ParsedAggregationProjection directly. These are meant as +// simpler integration tests. +// + +/** + * Class which provides useful helpers to test the functionality of the $project stage. + */ +class ProjectStageTest : public Mock::Base, public unittest::Test { protected: - void createProject(const BSONObj& projection = BSON("a" << true)) { + /** + * Creates the $project stage, which can be accessed via project(). + */ + void createProject(const BSONObj& projection) { BSONObj spec = BSON("$project" << projection); BSONElement specElement = spec.firstElement(); _project = DocumentSourceProject::createFromBson(specElement, ctx()); - checkBsonRepresentation(spec); } + DocumentSource* project() { return _project.get(); } - /** Assert that iterator state accessors consistently report the source is exhausted. */ + + /** + * Assert that iterator state accessors consistently report the source is exhausted. + */ void assertExhausted() const { ASSERT(!_project->getNext()); ASSERT(!_project->getNext()); ASSERT(!_project->getNext()); } - /** - * Check that the BSON representation generated by the souce matches the BSON it was - * created with. - */ - void checkBsonRepresentation(const BSONObj& spec) { - vector<Value> arr; - _project->serializeToArray(arr); - BSONObj generatedSpec = arr[0].getDocument().toBson(); - ASSERT_EQUALS(spec, generatedSpec); - } private: intrusive_ptr<DocumentSource> _project; }; -/** The 'a' and 'c.d' fields are included, but the 'b' field is not. */ -class Inclusion : public Base { -public: - void run() { - createProject(BSON("a" << true << "c" << BSON("d" << true))); - auto source = DocumentSourceMock::create("{_id:0,a:1,b:1,c:{d:1}}"); - project()->setSource(source.get()); - // The first result exists and is as expected. - boost::optional<Document> next = project()->getNext(); - ASSERT(bool(next)); - ASSERT_EQUALS(1, next->getField("a").getInt()); - ASSERT(next->getField("b").missing()); - // The _id field is included by default in the root document. - ASSERT_EQUALS(0, next->getField("_id").getInt()); - // The nested c.d inclusion. - ASSERT_EQUALS(1, (*next)["c"]["d"].getInt()); - } +TEST_F(ProjectStageTest, InclusionProjectionShouldRemoveUnspecifiedFields) { + createProject(BSON("a" << true << "c" << BSON("d" << true))); + auto source = DocumentSourceMock::create("{_id: 0, a: 1, b: 1, c: {d: 1}}"); + project()->setSource(source.get()); + // The first result exists and is as expected. + boost::optional<Document> next = project()->getNext(); + ASSERT_TRUE(next); + ASSERT_EQUALS(1, next->getField("a").getInt()); + ASSERT(next->getField("b").missing()); + // The _id field is included by default in the root document. + ASSERT_EQUALS(0, next->getField("_id").getInt()); + // The nested c.d inclusion. + ASSERT_EQUALS(1, (*next)["c"]["d"].getInt()); }; -/** Optimize the projection. */ -class Optimize : public Base { -public: - void run() { - createProject(BSON("a" << BSON("$and" << BSON_ARRAY(BSON("$const" << true))))); - project()->optimize(); - // Optimizing the DocumentSourceProject optimizes the Expressions that comprise it, - // in this case replacing an expression depending on constants with a constant. - checkBsonRepresentation(fromjson("{$project:{a:{$const:true}}}")); - } +TEST_F(ProjectStageTest, ShouldOptimizeInnerExpressions) { + createProject(BSON("a" << BSON("$and" << BSON_ARRAY(BSON("$const" << true))))); + project()->optimize(); + // The $and should have been replaced with its only argument. + vector<Value> serializedArray; + project()->serializeToArray(serializedArray); + ASSERT_EQUALS(serializedArray[0].getDocument().toBson(), + fromjson("{$project: {_id: true, a: {$const: true}}}")); }; -/** Projection spec is not an object. */ -class NonObjectSpec : public Base { -public: - void run() { - BSONObj spec = BSON("$project" - << "foo"); - BSONElement specElement = spec.firstElement(); - ASSERT_THROWS(DocumentSourceProject::createFromBson(specElement, ctx()), UserException); - } +TEST_F(ProjectStageTest, ShouldErrorOnNonObjectSpec) { + // Can't use createProject() helper because we want to give a non-object spec. + BSONObj spec = BSON("$project" + << "foo"); + BSONElement specElement = spec.firstElement(); + ASSERT_THROWS(DocumentSourceProject::createFromBson(specElement, ctx()), UserException); }; -/** Projection spec is an empty object. */ -class EmptyObjectSpec : public Base { -public: - void run() { - ASSERT_THROWS(createProject(BSONObj()), UserException); - } -}; +/** + * Basic sanity check that two documents can be projected correctly with a simple inclusion + * projection. + */ +TEST_F(ProjectStageTest, InclusionShouldBeAbleToProcessMultipleDocuments) { + createProject(BSON("a" << true)); + auto source = DocumentSourceMock::create({"{a: 1, b: 2}", "{a: 3, b: 4}"}); + project()->setSource(source.get()); + boost::optional<Document> next = project()->getNext(); + ASSERT(bool(next)); + ASSERT_EQUALS(1, next->getField("a").getInt()); + ASSERT(next->getField("b").missing()); + + next = project()->getNext(); + ASSERT(bool(next)); + ASSERT_EQUALS(3, next->getField("a").getInt()); + ASSERT(next->getField("b").missing()); -/** Projection spec contains a top level dollar sign. */ -class TopLevelDollar : public Base { -public: - void run() { - ASSERT_THROWS(createProject(BSON("$add" << BSONArray())), UserException); - } + assertExhausted(); }; -/** Projection spec is invalid. */ -class InvalidSpec : public Base { -public: - void run() { - ASSERT_THROWS(createProject(BSON("a" << BSON("$invalidOperator" << 1))), UserException); - } +/** + * Basic sanity check that two documents can be projected correctly with a simple inclusion + * projection. + */ +TEST_F(ProjectStageTest, ExclusionShouldBeAbleToProcessMultipleDocuments) { + createProject(BSON("a" << false)); + auto source = DocumentSourceMock::create({"{a: 1, b: 2}", "{a: 3, b: 4}"}); + project()->setSource(source.get()); + boost::optional<Document> next = project()->getNext(); + ASSERT(bool(next)); + ASSERT(next->getField("a").missing()); + ASSERT_EQUALS(2, next->getField("b").getInt()); + + next = project()->getNext(); + ASSERT(bool(next)); + ASSERT(next->getField("a").missing()); + ASSERT_EQUALS(4, next->getField("b").getInt()); + + assertExhausted(); }; -/** Two documents are projected. */ -class TwoDocuments : public Base { -public: - void run() { - createProject(); - auto source = DocumentSourceMock::create({"{a: 1, b:2}", "{a: 3, b: 4}"}); - project()->setSource(source.get()); - boost::optional<Document> next = project()->getNext(); - ASSERT(bool(next)); - ASSERT_EQUALS(1, next->getField("a").getInt()); - ASSERT(next->getField("b").missing()); +TEST_F(ProjectStageTest, InclusionShouldAddDependenciesOfIncludedAndComputedFields) { + createProject(fromjson("{a: true, x: '$b', y: {$and: ['$c','$d']}, z: {$meta: 'textScore'}}")); + DepsTracker dependencies; + ASSERT_EQUALS(DocumentSource::EXHAUSTIVE_FIELDS, project()->getDependencies(&dependencies)); + ASSERT_EQUALS(5U, dependencies.fields.size()); - next = project()->getNext(); - ASSERT(bool(next)); - ASSERT_EQUALS(3, next->getField("a").getInt()); - ASSERT(next->getField("b").missing()); + // Implicit _id dependency. + ASSERT_EQUALS(1U, dependencies.fields.count("_id")); - assertExhausted(); - } + // Inclusion dependency. + ASSERT_EQUALS(1U, dependencies.fields.count("a")); + + // Field path expression dependency. + ASSERT_EQUALS(1U, dependencies.fields.count("b")); + + // Nested expression dependencies. + ASSERT_EQUALS(1U, dependencies.fields.count("c")); + ASSERT_EQUALS(1U, dependencies.fields.count("d")); + ASSERT_EQUALS(false, dependencies.needWholeDocument); + ASSERT_EQUALS(true, dependencies.needTextScore); }; -/** List of dependent field paths. */ -class Dependencies : public Base { -public: - void run() { - createProject(fromjson("{a:true,x:'$b',y:{$and:['$c','$d']}, z: {$meta:'textScore'}}")); - DepsTracker dependencies; - ASSERT_EQUALS(DocumentSource::EXHAUSTIVE_FIELDS, project()->getDependencies(&dependencies)); - ASSERT_EQUALS(5U, dependencies.fields.size()); - // Implicit _id dependency. - ASSERT_EQUALS(1U, dependencies.fields.count("_id")); - // Inclusion dependency. - ASSERT_EQUALS(1U, dependencies.fields.count("a")); - // Field path expression dependency. - ASSERT_EQUALS(1U, dependencies.fields.count("b")); - // Nested expression dependencies. - ASSERT_EQUALS(1U, dependencies.fields.count("c")); - ASSERT_EQUALS(1U, dependencies.fields.count("d")); - ASSERT_EQUALS(false, dependencies.needWholeDocument); - ASSERT_EQUALS(true, dependencies.needTextScore); - } +TEST_F(ProjectStageTest, ExclusionShouldNotAddDependencies) { + createProject(fromjson("{a: false, 'b.c': false}")); + + DepsTracker dependencies; + ASSERT_EQUALS(DocumentSource::SEE_NEXT, project()->getDependencies(&dependencies)); + + ASSERT_EQUALS(0U, dependencies.fields.size()); + ASSERT_EQUALS(false, dependencies.needWholeDocument); + ASSERT_EQUALS(false, dependencies.needTextScore); }; } // namespace DocumentSourceProject @@ -3666,15 +3674,6 @@ public: add<DocumentSourceGroup::StreamingWithFieldRepeated>(); #endif - add<DocumentSourceProject::Inclusion>(); - add<DocumentSourceProject::Optimize>(); - add<DocumentSourceProject::NonObjectSpec>(); - add<DocumentSourceProject::EmptyObjectSpec>(); - add<DocumentSourceProject::TopLevelDollar>(); - add<DocumentSourceProject::InvalidSpec>(); - add<DocumentSourceProject::TwoDocuments>(); - add<DocumentSourceProject::Dependencies>(); - add<DocumentSourceSort::Empty>(); add<DocumentSourceSort::SingleValue>(); add<DocumentSourceSort::TwoValues>(); diff --git a/src/mongo/db/pipeline/document_source_unwind.cpp b/src/mongo/db/pipeline/document_source_unwind.cpp index 58a4d979146..b02723c1c94 100644 --- a/src/mongo/db/pipeline/document_source_unwind.cpp +++ b/src/mongo/db/pipeline/document_source_unwind.cpp @@ -230,11 +230,10 @@ Pipeline::SourceContainer::iterator DocumentSourceUnwind::optimizeAt( invariant(*itr == this); if (auto nextMatch = dynamic_cast<DocumentSourceMatch*>((*std::next(itr)).get())) { - const bool includeDollarPrefix = false; - std::set<std::string> fields = {_unwindPath.getPath(includeDollarPrefix)}; + std::set<std::string> fields = {_unwindPath.fullPath()}; if (_indexPath) { - fields.insert((*_indexPath).getPath(false)); + fields.insert((*_indexPath).fullPath()); } auto splitMatch = nextMatch->splitSourceBy(fields); @@ -267,14 +266,14 @@ Pipeline::SourceContainer::iterator DocumentSourceUnwind::optimizeAt( Value DocumentSourceUnwind::serialize(bool explain) const { return Value(DOC(getSourceName() << DOC( - "path" << _unwindPath.getPath(true) << "preserveNullAndEmptyArrays" + "path" << _unwindPath.fullPathWithPrefix() << "preserveNullAndEmptyArrays" << (_preserveNullAndEmptyArrays ? Value(true) : Value()) << "includeArrayIndex" - << (_indexPath ? Value((*_indexPath).getPath(false)) : Value())))); + << (_indexPath ? Value((*_indexPath).fullPath()) : Value())))); } DocumentSource::GetDepsReturn DocumentSourceUnwind::getDependencies(DepsTracker* deps) const { - deps->fields.insert(_unwindPath.getPath(false)); + deps->fields.insert(_unwindPath.fullPath()); return SEE_NEXT; } diff --git a/src/mongo/db/pipeline/expression.cpp b/src/mongo/db/pipeline/expression.cpp index 1c976df8f42..ad958e837a1 100644 --- a/src/mongo/db/pipeline/expression.cpp +++ b/src/mongo/db/pipeline/expression.cpp @@ -52,7 +52,9 @@ using Parser = Expression::Parser; using namespace mongoutils; using boost::intrusive_ptr; +using std::map; using std::move; +using std::pair; using std::set; using std::string; using std::vector; @@ -171,20 +173,6 @@ Variables::Id VariablesParseState::getVariable(StringData name) const { /* --------------------------- Expression ------------------------------ */ -Expression::ObjectCtx::ObjectCtx(int theOptions) : options(theOptions) {} - -bool Expression::ObjectCtx::documentOk() const { - return ((options & DOCUMENT_OK) != 0); -} - -bool Expression::ObjectCtx::topLevel() const { - return ((options & TOP_LEVEL) != 0); -} - -bool Expression::ObjectCtx::inclusionOk() const { - return ((options & INCLUSION_OK) != 0); -} - string Expression::removeFieldPrefix(const string& prefixedField) { uassert(16419, str::stream() << "field path must not contain embedded null characters" @@ -201,123 +189,17 @@ string Expression::removeFieldPrefix(const string& prefixedField) { return string(pPrefixedField + 1); } -intrusive_ptr<Expression> Expression::parseObject(BSONObj obj, - ObjectCtx* pCtx, - const VariablesParseState& vps) { - /* - An object expression can take any of the following forms: - - f0: {f1: ..., f2: ..., f3: ...} - f0: {$operator:[operand1, operand2, ...]} - */ - - intrusive_ptr<Expression> pExpression; // the result - intrusive_ptr<ExpressionObject> pExpressionObject; // alt result - enum { UNKNOWN, NOTOPERATOR, OPERATOR } kind = UNKNOWN; - - if (obj.isEmpty()) - return ExpressionObject::create(); - BSONObjIterator iter(obj); - - for (size_t fieldCount = 0; iter.more(); ++fieldCount) { - BSONElement fieldElement(iter.next()); - const char* pFieldName = fieldElement.fieldName(); - - if (pFieldName[0] == '$') { - uassert( - 15983, - str::stream() << "the operator must be the only field in a pipeline object (at '" - << pFieldName - << "'", - fieldCount == 0); - - uassert(16404, - "$expressions are not allowed at the top-level of $project", - !pCtx->topLevel()); - - /* we've determined this "object" is an operator expression */ - kind = OPERATOR; - - pExpression = parseExpression(fieldElement, vps); - } else { - uassert(15990, - str::stream() << "this object is already an operator expression, and can't be " - "used as a document expression (at '" - << pFieldName - << "')", - kind != OPERATOR); - - uassert(16405, - "dotted field names are only allowed at the top level", - pCtx->topLevel() || !str::contains(pFieldName, '.')); - - /* if it's our first time, create the document expression */ - if (!pExpression.get()) { - verify(pCtx->documentOk()); - // CW TODO error: document not allowed in this context - - pExpressionObject = - pCtx->topLevel() ? ExpressionObject::createRoot() : ExpressionObject::create(); - pExpression = pExpressionObject; - - /* this "object" is not an operator expression */ - kind = NOTOPERATOR; - } +intrusive_ptr<Expression> Expression::parseObject(BSONObj obj, const VariablesParseState& vps) { + if (obj.isEmpty()) { + return ExpressionObject::create({}); + } - BSONType fieldType = fieldElement.type(); - string fieldName(pFieldName); - switch (fieldType) { - case Object: { - /* it's a nested document */ - ObjectCtx oCtx((pCtx->documentOk() ? ObjectCtx::DOCUMENT_OK : 0) | - (pCtx->inclusionOk() ? ObjectCtx::INCLUSION_OK : 0)); - - pExpressionObject->addField(fieldName, - parseObject(fieldElement.Obj(), &oCtx, vps)); - break; - } - case String: { - /* it's a renamed field */ - // CW TODO could also be a constant - pExpressionObject->addField( - fieldName, ExpressionFieldPath::parse(fieldElement.str(), vps)); - break; - } - case Array: - pExpressionObject->addField(fieldName, - ExpressionArray::parse(fieldElement, vps)); - break; - case Bool: - case NumberDouble: - case NumberLong: - case NumberInt: { - /* it's an inclusion specification */ - if (fieldElement.trueValue()) { - uassert(16420, - "field inclusion is not allowed inside of $expressions", - pCtx->inclusionOk()); - pExpressionObject->includePath(fieldName); - } else { - uassert(16406, - "The top-level _id field is the only field currently supported for " - "exclusion", - pCtx->topLevel() && fieldName == "_id"); - pExpressionObject->excludeId(true); - } - break; - } - default: - uassert(15992, - str::stream() << "disallowed field type " << typeName(fieldType) - << " in object expression (at '" - << fieldName - << "')", - false); - } - } + if (obj.firstElementFieldName()[0] == '$') { + // Assume this is an expression like {$add: [...]}. + return parseExpression(obj, vps); } - return pExpression; + return ExpressionObject::parse(obj, vps); } namespace { @@ -332,12 +214,20 @@ void Expression::registerExpression(string key, Parser parser) { parserMap[key] = parser; } -intrusive_ptr<Expression> Expression::parseExpression(BSONElement exprElement, - const VariablesParseState& vps) { - const char* opName = exprElement.fieldName(); +intrusive_ptr<Expression> Expression::parseExpression(BSONObj obj, const VariablesParseState& vps) { + uassert(15983, + str::stream() << "An object representing an expression must have exactly one " + "field: " + << obj.toString(), + obj.nFields() == 1); + + // Look up the parser associated with the expression name. + const char* opName = obj.firstElementFieldName(); auto op = parserMap.find(opName); - uassert(15999, str::stream() << "invalid operator '" << opName << "'", op != parserMap.end()); - return op->second(exprElement, vps); + uassert(15999, + str::stream() << "Unrecognized expression '" << opName << "'", + op != parserMap.end()); + return op->second(obj.firstElement(), vps); } Expression::ExpressionVector ExpressionNary::parseArguments(BSONElement exprElement, @@ -347,7 +237,7 @@ Expression::ExpressionVector ExpressionNary::parseArguments(BSONElement exprElem BSONForEach(elem, exprElement.Obj()) { out.push_back(Expression::parseOperand(elem, vps)); } - } else { // assume it's an atomic operand + } else { // Assume it's an operand that accepts a single argument. out.push_back(Expression::parseOperand(exprElement, vps)); } @@ -362,8 +252,7 @@ intrusive_ptr<Expression> Expression::parseOperand(BSONElement exprElement, /* if we got here, this is a field path expression */ return ExpressionFieldPath::parse(exprElement.str(), vps); } else if (type == Object) { - ObjectCtx oCtx(ObjectCtx::DOCUMENT_OK); - return Expression::parseObject(exprElement.Obj(), &oCtx, vps); + return Expression::parseObject(exprElement.Obj(), vps); } else if (type == Array) { return ExpressionArray::parse(exprElement, vps); } else { @@ -745,7 +634,7 @@ intrusive_ptr<Expression> ExpressionCoerceToBool::optimize() { return intrusive_ptr<Expression>(this); } -void ExpressionCoerceToBool::addDependencies(DepsTracker* deps, vector<string>* path) const { +void ExpressionCoerceToBool::addDependencies(DepsTracker* deps) const { pExpression->addDependencies(deps); } @@ -978,7 +867,7 @@ intrusive_ptr<Expression> ExpressionConstant::optimize() { return intrusive_ptr<Expression>(this); } -void ExpressionConstant::addDependencies(DepsTracker* deps, vector<string>* path) const { +void ExpressionConstant::addDependencies(DepsTracker* deps) const { /* nothing to do */ } @@ -1191,7 +1080,7 @@ void ExpressionDateToString::insertPadded(StringBuilder& sb, int number, int wid sb << number; } -void ExpressionDateToString::addDependencies(DepsTracker* deps, vector<string>* path) const { +void ExpressionDateToString::addDependencies(DepsTracker* deps) const { _date->addDependencies(deps); } @@ -1286,284 +1175,64 @@ const char* ExpressionExp::getOpName() const { /* ---------------------- ExpressionObject --------------------------- */ -intrusive_ptr<ExpressionObject> ExpressionObject::create() { - return new ExpressionObject(false); -} +ExpressionObject::ExpressionObject(vector<pair<string, intrusive_ptr<Expression>>>&& expressions) + : _expressions(std::move(expressions)) {} -intrusive_ptr<ExpressionObject> ExpressionObject::createRoot() { - return new ExpressionObject(true); +intrusive_ptr<ExpressionObject> ExpressionObject::create( + vector<pair<string, intrusive_ptr<Expression>>>&& expressions) { + return new ExpressionObject(std::move(expressions)); } -ExpressionObject::ExpressionObject(bool atRoot) : _excludeId(false), _atRoot(atRoot) {} - -intrusive_ptr<Expression> ExpressionObject::optimize() { - for (FieldMap::iterator it(_expressions.begin()); it != _expressions.end(); ++it) { - if (it->second) - it->second = it->second->optimize(); - } - - return intrusive_ptr<Expression>(this); -} +intrusive_ptr<ExpressionObject> ExpressionObject::parse(BSONObj obj, + const VariablesParseState& vps) { + // Make sure we don't have any duplicate field names. + std::unordered_set<string> specifiedFields; -bool ExpressionObject::isSimple() { - for (FieldMap::iterator it(_expressions.begin()); it != _expressions.end(); ++it) { - if (it->second && !it->second->isSimple()) - return false; - } - return true; -} + vector<pair<string, intrusive_ptr<Expression>>> expressions; + for (auto&& elem : obj) { + // Make sure this element has a valid field name. Use StringData here so that we can detect + // if the field name contains a null byte. + FieldPath::uassertValidFieldName(elem.fieldNameStringData()); -void ExpressionObject::addDependencies(DepsTracker* deps, vector<string>* path) const { - string pathStr; - if (path) { - if (path->empty()) { - // we are in the top level of a projection so _id is implicit - if (!_excludeId) - deps->fields.insert("_id"); - } else { - FieldPath f(*path); - pathStr = f.getPath(false); - pathStr += '.'; - } - } else { - verify(!_excludeId); + auto fieldName = elem.fieldName(); + uassert(16406, + str::stream() << "duplicate field name specified in object literal: " + << obj.toString(), + specifiedFields.find(fieldName) == specifiedFields.end()); + specifiedFields.insert(fieldName); + expressions.emplace_back(fieldName, parseOperand(elem, vps)); } - - for (FieldMap::const_iterator it(_expressions.begin()); it != _expressions.end(); ++it) { - if (it->second) { - if (path) - path->push_back(it->first); - it->second->addDependencies(deps, path); - if (path) - path->pop_back(); - } else { // inclusion - uassert(16407, "inclusion not supported in objects nested in $expressions", path); - - deps->fields.insert(pathStr + it->first); - } - } + return new ExpressionObject{std::move(expressions)}; } -void ExpressionObject::addToDocument(MutableDocument& out, - const Document& currentDoc, - Variables* vars) const { - FieldMap::const_iterator end = _expressions.end(); - - // This is used to mark fields we've done so that we can add the ones we haven't - set<string> doneFields; - - FieldIterator fields(currentDoc); - while (fields.more()) { - Document::FieldPair field(fields.next()); - - // TODO don't make a new string here - const string fieldName = field.first.toString(); - FieldMap::const_iterator exprIter = _expressions.find(fieldName); - - // This field is not supposed to be in the output (unless it is _id) - if (exprIter == end) { - if (!_excludeId && _atRoot && field.first == "_id") { - // _id from the root doc is always included (until exclusion is supported) - // not updating doneFields since "_id" isn't in _expressions - out.addField(field.first, field.second); - } - continue; - } - - // make sure we don't add this field again - doneFields.insert(exprIter->first); - - Expression* expr = exprIter->second.get(); - - if (!expr) { - // This means pull the matching field from the input document - out.addField(field.first, field.second); - continue; - } - - ExpressionObject* exprObj = dynamic_cast<ExpressionObject*>(expr); - BSONType valueType = field.second.getType(); - if ((valueType != Object && valueType != Array) || !exprObj) { - // This expression replace the whole field - - Value pValue(expr->evaluateInternal(vars)); - - // don't add field if nothing was found in the subobject - if (exprObj && pValue.getDocument().empty()) - continue; - - /* - Don't add non-existent values (note: different from NULL or Undefined); - this is consistent with existing selection syntax which doesn't - force the appearance of non-existent fields. - */ - if (!pValue.missing()) - out.addField(field.first, pValue); - - continue; - } - - /* - Check on the type of the input value. If it's an - object, just walk down into that recursively, and - add it to the result. - */ - if (valueType == Object) { - MutableDocument sub(exprObj->getSizeHint()); - exprObj->addToDocument(sub, field.second.getDocument(), vars); - out.addField(field.first, sub.freezeToValue()); - } else if (valueType == Array) { - /* - If it's an array, we have to do the same thing, - but to each array element. Then, add the array - of results to the current document. - */ - vector<Value> result; - const vector<Value>& input = field.second.getArray(); - for (size_t i = 0; i < input.size(); i++) { - // can't look for a subfield in a non-object value. - if (input[i].getType() != Object) - continue; - - MutableDocument doc(exprObj->getSizeHint()); - exprObj->addToDocument(doc, input[i].getDocument(), vars); - result.push_back(doc.freezeToValue()); - } - - out.addField(field.first, Value(std::move(result))); - } else { - verify(false); - } - } - - if (doneFields.size() == _expressions.size()) - return; - - /* add any remaining fields we haven't already taken care of */ - for (vector<string>::const_iterator i(_order.begin()); i != _order.end(); ++i) { - FieldMap::const_iterator it = _expressions.find(*i); - string fieldName(it->first); - - /* if we've already dealt with this field, above, do nothing */ - if (doneFields.count(fieldName)) - continue; - - // this is a missing inclusion field - if (!it->second) - continue; - - Value pValue(it->second->evaluateInternal(vars)); - - /* - Don't add non-existent values (note: different from NULL or Undefined); - this is consistent with existing selection syntax which doesn't - force the appearnance of non-existent fields. - */ - if (pValue.missing()) - continue; - - // don't add field if nothing was found in the subobject - if (dynamic_cast<ExpressionObject*>(it->second.get()) && pValue.getDocument().empty()) - continue; - - out.addField(fieldName, pValue); +intrusive_ptr<Expression> ExpressionObject::optimize() { + for (auto&& pair : _expressions) { + pair.second = pair.second->optimize(); } + return this; } -size_t ExpressionObject::getSizeHint() const { - // Note: this can overestimate, but that is better than underestimating - return _expressions.size() + (_excludeId ? 0 : 1); -} - -Document ExpressionObject::evaluateDocument(Variables* vars) const { - /* create and populate the result */ - MutableDocument out(getSizeHint()); - - addToDocument(out, - Document(), // No inclusion field matching. - vars); - return out.freeze(); +void ExpressionObject::addDependencies(DepsTracker* deps) const { + for (auto&& pair : _expressions) { + pair.second->addDependencies(deps); + } } Value ExpressionObject::evaluateInternal(Variables* vars) const { - return Value(evaluateDocument(vars)); -} - -void ExpressionObject::addField(const FieldPath& fieldPath, - const intrusive_ptr<Expression>& pExpression) { - const string fieldPart = fieldPath.getFieldName(0); - const bool haveExpr = _expressions.count(fieldPart); - - intrusive_ptr<Expression>& expr = _expressions[fieldPart]; // inserts if !haveExpr - intrusive_ptr<ExpressionObject> subObj = dynamic_cast<ExpressionObject*>(expr.get()); - - if (!haveExpr) { - _order.push_back(fieldPart); - } else { // we already have an expression or inclusion for this field - if (fieldPath.getPathLength() == 1) { - // This expression is for right here - - ExpressionObject* newSubObj = dynamic_cast<ExpressionObject*>(pExpression.get()); - uassert(16400, - str::stream() << "can't add an expression for field " << fieldPart - << " because there is already an expression for that field" - << " or one of its sub-fields.", - subObj && newSubObj); // we can merge them - - // Copy everything from the newSubObj to the existing subObj - // This is for cases like { $project:{ 'b.c':1, b:{ a:1 } } } - for (vector<string>::const_iterator it(newSubObj->_order.begin()); - it != newSubObj->_order.end(); - ++it) { - // asserts if any fields are dupes - subObj->addField(*it, newSubObj->_expressions[*it]); - } - return; - } else { - // This expression is for a subfield - uassert(16401, - str::stream() << "can't add an expression for a subfield of " << fieldPart - << " because there is already an expression that applies to" - << " the whole field", - subObj); - } - } - - if (fieldPath.getPathLength() == 1) { - verify(!haveExpr); // haveExpr case handled above. - expr = pExpression; - return; + MutableDocument outputDoc; + for (auto&& pair : _expressions) { + outputDoc.setNestedField(FieldPath(pair.first), pair.second->evaluateInternal(vars)); } - - if (!haveExpr) - expr = subObj = ExpressionObject::create(); - - subObj->addField(fieldPath.tail(), pExpression); -} - -void ExpressionObject::includePath(const string& theFieldPath) { - addField(theFieldPath, NULL); + return outputDoc.freezeToValue(); } Value ExpressionObject::serialize(bool explain) const { - MutableDocument valBuilder; - if (_excludeId) - valBuilder["_id"] = Value(false); - - for (vector<string>::const_iterator it(_order.begin()); it != _order.end(); ++it) { - string fieldName = *it; - verify(_expressions.find(fieldName) != _expressions.end()); - intrusive_ptr<Expression> expr = _expressions.find(fieldName)->second; - - if (!expr) { - // this is inclusion, not an expression - valBuilder[fieldName] = Value(true); - } else { - valBuilder[fieldName] = expr->serialize(explain); - } + MutableDocument outputDoc; + for (auto&& pair : _expressions) { + outputDoc.setNestedField(FieldPath(pair.first), pair.second->serialize(explain)); } - return valBuilder.freezeToValue(); + return outputDoc.freezeToValue(); } /* --------------------- ExpressionFieldPath --------------------------- */ @@ -1605,12 +1274,12 @@ intrusive_ptr<Expression> ExpressionFieldPath::optimize() { return intrusive_ptr<Expression>(this); } -void ExpressionFieldPath::addDependencies(DepsTracker* deps, vector<string>* path) const { +void ExpressionFieldPath::addDependencies(DepsTracker* deps) const { if (_variable == Variables::ROOT_ID) { // includes CURRENT when it is equivalent to ROOT. if (_fieldPath.getPathLength() == 1) { deps->needWholeDocument = true; // need full doc if just "$$ROOT" } else { - deps->fields.insert(_fieldPath.tail().getPath(false)); + deps->fields.insert(_fieldPath.tail().fullPath()); } } } @@ -1677,9 +1346,9 @@ Value ExpressionFieldPath::evaluateInternal(Variables* vars) const { Value ExpressionFieldPath::serialize(bool explain) const { if (_fieldPath.getFieldName(0) == "CURRENT" && _fieldPath.getPathLength() > 1) { // use short form for "$$CURRENT.foo" but not just "$$CURRENT" - return Value("$" + _fieldPath.tail().getPath(false)); + return Value("$" + _fieldPath.tail().fullPath()); } else { - return Value("$$" + _fieldPath.getPath(false)); + return Value("$$" + _fieldPath.fullPath()); } } @@ -1778,7 +1447,7 @@ Value ExpressionFilter::evaluateInternal(Variables* vars) const { return Value(std::move(output)); } -void ExpressionFilter::addDependencies(DepsTracker* deps, vector<string>* path) const { +void ExpressionFilter::addDependencies(DepsTracker* deps) const { _input->addDependencies(deps); _filter->addDependencies(deps); } @@ -1888,7 +1557,7 @@ Value ExpressionLet::evaluateInternal(Variables* vars) const { return _subExpression->evaluateInternal(vars); } -void ExpressionLet::addDependencies(DepsTracker* deps, vector<string>* path) const { +void ExpressionLet::addDependencies(DepsTracker* deps) const { for (VariableMap::const_iterator it = _variables.begin(), end = _variables.end(); it != end; ++it) { it->second.expression->addDependencies(deps); @@ -1992,7 +1661,7 @@ Value ExpressionMap::evaluateInternal(Variables* vars) const { return Value(std::move(output)); } -void ExpressionMap::addDependencies(DepsTracker* deps, vector<string>* path) const { +void ExpressionMap::addDependencies(DepsTracker* deps) const { _input->addDependencies(deps); _each->addDependencies(deps); } @@ -2037,7 +1706,7 @@ Value ExpressionMeta::evaluateInternal(Variables* vars) const { MONGO_UNREACHABLE; } -void ExpressionMeta::addDependencies(DepsTracker* deps, vector<string>* path) const { +void ExpressionMeta::addDependencies(DepsTracker* deps) const { if (_metaType == MetaType::TEXT_SCORE) { deps->needTextScore = true; } @@ -2659,9 +2328,9 @@ intrusive_ptr<Expression> ExpressionNary::optimize() { return this; } -void ExpressionNary::addDependencies(DepsTracker* deps, vector<string>* path) const { - for (ExpressionVector::const_iterator i(vpOperand.begin()); i != vpOperand.end(); ++i) { - (*i)->addDependencies(deps); +void ExpressionNary::addDependencies(DepsTracker* deps) const { + for (auto&& operand : vpOperand) { + operand->addDependencies(deps); } } @@ -3052,7 +2721,7 @@ intrusive_ptr<Expression> ExpressionReduce::optimize() { return this; } -void ExpressionReduce::addDependencies(DepsTracker* deps, vector<string>* path) const { +void ExpressionReduce::addDependencies(DepsTracker* deps) const { _input->addDependencies(deps); _initial->addDependencies(deps); _in->addDependencies(deps); @@ -3889,14 +3558,14 @@ boost::intrusive_ptr<Expression> ExpressionSwitch::parse(BSONElement expr, return expression; } -void ExpressionSwitch::addDependencies(DepsTracker* deps, std::vector<std::string>* path) const { +void ExpressionSwitch::addDependencies(DepsTracker* deps) const { for (auto&& branch : _branches) { - branch.first->addDependencies(deps, path); - branch.second->addDependencies(deps, path); + branch.first->addDependencies(deps); + branch.second->addDependencies(deps); } if (_default) { - _default->addDependencies(deps, path); + _default->addDependencies(deps); } } @@ -4347,7 +4016,7 @@ Value ExpressionZip::serialize(bool explain) const { << serializedUseLongestLength))); } -void ExpressionZip::addDependencies(DepsTracker* deps, std::vector<std::string>* path) const { +void ExpressionZip::addDependencies(DepsTracker* deps) const { std::for_each( _inputs.begin(), _inputs.end(), [&deps](intrusive_ptr<Expression> inputExpression) -> void { inputExpression->addDependencies(deps); diff --git a/src/mongo/db/pipeline/expression.h b/src/mongo/db/pipeline/expression.h index 8ed6f95f6eb..c4ac4b07704 100644 --- a/src/mongo/db/pipeline/expression.h +++ b/src/mongo/db/pipeline/expression.h @@ -64,7 +64,7 @@ class DocumentSource; return Status::OK(); \ } -// TODO: Look into merging with ExpressionContext and possibly ObjectCtx. +// TODO: Look into merging with ExpressionContext. /// The state used as input and working space for Expressions. class Variables { MONGO_DISALLOW_COPYING(Variables); @@ -176,121 +176,82 @@ public: virtual ~Expression(){}; - /* - Optimize the Expression. - - This provides an opportunity to do constant folding, or to - collapse nested operators that have the same precedence, such as - $add, $and, or $or. - - The Expression should be replaced with the return value, which may - or may not be the same object. In the case of constant folding, - a computed expression may be replaced by a constant. - - @returns the optimized Expression + /** + * Optimize the Expression. + * + * This provides an opportunity to do constant folding, or to collapse nested operators that + * have the same precedence, such as $add, $and, or $or. + * + * The Expression will be replaced with the return value, which may or may not be the same + * object. In the case of constant folding, a computed expression may be replaced by a constant. + * + * Returns the optimized Expression. */ virtual boost::intrusive_ptr<Expression> optimize() { return this; } /** - * Add this expression's field dependencies to the set + * Add the fields used as input to this expression to 'deps'. * * Expressions are trees, so this is often recursive. - * - * @param deps Fully qualified paths to depended-on fields are added to this set. - * Empty std::string means need full document. - * @param path path to self if all ancestors are ExpressionObjects. - * Top-level ExpressionObject gets pointer to empty vector. - * If any other Expression is an ancestor, or in other cases - * where {a:1} inclusion objects aren't allowed, they get - * NULL. */ - virtual void addDependencies(DepsTracker* deps, - std::vector<std::string>* path = NULL) const = 0; - - /** simple expressions are just inclusion exclusion as supported by ExpressionObject */ - virtual bool isSimple() { - return false; - } - + virtual void addDependencies(DepsTracker* deps) const = 0; /** * Serialize the Expression tree recursively. - * If explain is false, returns a Value parsable by parseOperand(). + * + * If 'explain' is false, the returned Value must result in the same Expression when parsed by + * parseOperand(). */ virtual Value serialize(bool explain) const = 0; - /// Evaluate expression with specified inputs and return result. (only used by tests) + /** + * Evaluate expression with respect to the Document given by 'root', and return the result. + * + * This method should only be used for testing. + */ Value evaluate(const Document& root) const { Variables vars(0, root); return evaluate(&vars); } /** - * Evaluate expression with specified inputs and return result. + * Evaluate expression with variables given by 'vars', and return the result. * - * While vars is non-const, if properly constructed, subexpressions modifications to it - * should not effect outer expressions due to unique variable Ids. + * While vars is non-const, a subexpression's modifications to it should not effect outer + * Expressions, since variables defined in the subexpression's scope will be given unique + * variable ids. */ Value evaluate(Variables* vars) const { return evaluateInternal(vars); } - /* - Utility class for parseObject() below. - - DOCUMENT_OK indicates that it is OK to use a Document in the current - context. - */ - class ObjectCtx { - public: - explicit ObjectCtx(int options); - static const int DOCUMENT_OK = 0x0001; - static const int TOP_LEVEL = 0x0002; - static const int INCLUSION_OK = 0x0004; - - bool documentOk() const; - bool topLevel() const; - bool inclusionOk() const; - - private: - int options; - }; - - // - // Diagram of relationship between parse functions when parsing a $op: - // - // { someFieldOrArrayIndex: { $op: [ARGS] } } - // ^ parseExpression on inner $op BSONElement - // ^ parseObject on BSONObject - // ^ parseOperand on outer BSONElement wrapping the $op Object - // - /** - * Parses a BSON Object that could represent a functional expression or a Document - * expression. + * Parses a BSON Object that could represent an object literal or a functional expression like + * $add. + * + * Calls parseExpression() on any sub-document (including possibly the entire document) which + * consists of a single field name starting with a '$'. */ static boost::intrusive_ptr<Expression> parseObject(BSONObj obj, - ObjectCtx* pCtx, const VariablesParseState& vps); /** - * Parses a BSONElement which has already been determined to be functional expression. + * Parses a BSONObj which has already been determined to be a functional expression. * - * exprElement should be the only element inside the expression object. That is the - * field name should be the $op for the expression. + * Throws an error if 'obj' does not contain exactly one field, or if that field's name does not + * match a registered expression name. */ - static boost::intrusive_ptr<Expression> parseExpression(BSONElement exprElement, + static boost::intrusive_ptr<Expression> parseExpression(BSONObj obj, const VariablesParseState& vps); - /** - * Parses a BSONElement which is an operand in an Expression. + * Parses a BSONElement which is an argument to an Expression. * - * This is the most generic parser and can parse ExpressionFieldPath, a literal, or a $op. - * If it is a $op, exprElement should be the outer element whose value is an Object - * containing the $op. + * An argument is allowed to be another expression, or a literal value, so this can call + * parseObject(), ExpressionFieldPath::parse(), ExpressionArray::parse(), or + * ExpressionConstant::parse() as necessary. */ static boost::intrusive_ptr<Expression> parseOperand(BSONElement exprElement, const VariablesParseState& vps); @@ -313,7 +274,7 @@ public: virtual Value evaluateInternal(Variables* vars) const = 0; /** - * Registers an Parser so it can be called from parseExpression and friends. + * Registers an Parser so it can be called from parseExpression. * * DO NOT call this method directly. Instead, use the REGISTER_EXPRESSION macro defined in this * file. @@ -330,7 +291,7 @@ class ExpressionNary : public Expression { public: boost::intrusive_ptr<Expression> optimize() override; Value serialize(bool explain) const override; - void addDependencies(DepsTracker* deps, std::vector<std::string>* path = NULL) const override; + void addDependencies(DepsTracker* deps) const override; /* Add an operand to the n-ary expression. @@ -572,7 +533,7 @@ public: class ExpressionCoerceToBool final : public Expression { public: boost::intrusive_ptr<Expression> optimize() final; - void addDependencies(DepsTracker* deps, std::vector<std::string>* path = NULL) const final; + void addDependencies(DepsTracker* deps) const final; Value evaluateInternal(Variables* vars) const final; Value serialize(bool explain) const final; @@ -652,7 +613,7 @@ public: class ExpressionConstant final : public Expression { public: boost::intrusive_ptr<Expression> optimize() final; - void addDependencies(DepsTracker* deps, std::vector<std::string>* path = NULL) const final; + void addDependencies(DepsTracker* deps) const final; Value evaluateInternal(Variables* vars) const final; Value serialize(bool explain) const final; @@ -682,7 +643,7 @@ public: boost::intrusive_ptr<Expression> optimize() final; Value serialize(bool explain) const final; Value evaluateInternal(Variables* vars) const final; - void addDependencies(DepsTracker* deps, std::vector<std::string>* path = NULL) const final; + void addDependencies(DepsTracker* deps) const final; static boost::intrusive_ptr<Expression> parse(BSONElement expr, const VariablesParseState& vps); @@ -754,7 +715,7 @@ class ExpressionExp final : public ExpressionSingleNumericArg<ExpressionExp> { class ExpressionFieldPath final : public Expression { public: boost::intrusive_ptr<Expression> optimize() final; - void addDependencies(DepsTracker* deps, std::vector<std::string>* path = NULL) const final; + void addDependencies(DepsTracker* deps) const final; Value evaluateInternal(Variables* vars) const final; Value serialize(bool explain) const final; @@ -812,7 +773,7 @@ public: boost::intrusive_ptr<Expression> optimize() final; Value serialize(bool explain) const final; Value evaluateInternal(Variables* vars) const final; - void addDependencies(DepsTracker* deps, std::vector<std::string>* path = NULL) const final; + void addDependencies(DepsTracker* deps) const final; static boost::intrusive_ptr<Expression> parse(BSONElement expr, const VariablesParseState& vps); @@ -894,7 +855,7 @@ public: boost::intrusive_ptr<Expression> optimize() final; Value serialize(bool explain) const final; Value evaluateInternal(Variables* vars) const final; - void addDependencies(DepsTracker* deps, std::vector<std::string>* path = NULL) const final; + void addDependencies(DepsTracker* deps) const final; static boost::intrusive_ptr<Expression> parse(BSONElement expr, const VariablesParseState& vps); @@ -936,7 +897,7 @@ public: boost::intrusive_ptr<Expression> optimize() final; Value serialize(bool explain) const final; Value evaluateInternal(Variables* vars) const final; - void addDependencies(DepsTracker* deps, std::vector<std::string>* path = NULL) const final; + void addDependencies(DepsTracker* deps) const final; static boost::intrusive_ptr<Expression> parse(BSONElement expr, const VariablesParseState& vps); @@ -957,7 +918,7 @@ class ExpressionMeta final : public Expression { public: Value serialize(bool explain) const final; Value evaluateInternal(Variables* vars) const final; - void addDependencies(DepsTracker* deps, std::vector<std::string>* path = NULL) const final; + void addDependencies(DepsTracker* deps) const final; static boost::intrusive_ptr<Expression> parse(BSONElement expr, const VariablesParseState& vps); @@ -1033,115 +994,45 @@ public: }; +/** + * This class is used to represent expressions that create object literals, such as the value of + * '_id' in this group stage: + * {$group: { + * _id: {b: "$a", c: {$add: [4, "$c"]}} <- This is represented as an ExpressionObject. + * ... + * }} + */ class ExpressionObject final : public Expression { public: - using FieldMap = std::map<std::string, boost::intrusive_ptr<Expression>>; boost::intrusive_ptr<Expression> optimize() final; - bool isSimple() final; - void addDependencies(DepsTracker* deps, std::vector<std::string>* path = NULL) const final; - /** Only evaluates non inclusion expressions. For inclusions, use addToDocument(). */ + void addDependencies(DepsTracker* deps) const final; Value evaluateInternal(Variables* vars) const final; Value serialize(bool explain) const final; - /// like evaluate(), but return a Document instead of a Value-wrapped Document. - Document evaluateDocument(Variables* vars) const; - - /** Evaluates with inclusions and adds results to passed in Mutable document - * - * @param output the MutableDocument to add the evaluated expressions to - * @param currentDoc the input Document for this level (for inclusions) - * @param vars the variables for use in subexpressions - */ - void addToDocument(MutableDocument& ouput, const Document& currentDoc, Variables* vars) const; - - // estimated number of fields that will be output - size_t getSizeHint() const; - - /** Create an empty expression. - * Until fields are added, this will evaluate to an empty document. - */ - static boost::intrusive_ptr<ExpressionObject> create(); - - /// Like create but uses special handling of _id for root object of $project. - static boost::intrusive_ptr<ExpressionObject> createRoot(); - - /* - Add a field to the document expression. - - @param fieldPath the path the evaluated expression will have in the - result Document - @param pExpression the expression to evaluate obtain this field's - Value in the result Document - */ - void addField(const FieldPath& fieldPath, const boost::intrusive_ptr<Expression>& pExpression); - - /* - Add a field path to the set of those to be included. - - Note that including a nested field implies including everything on - the path leading down to it. - - @param fieldPath the name of the field to be included - */ - void includePath(const std::string& fieldPath); - - /* - Get a count of the added fields. - - @returns how many fields have been added - */ - size_t getFieldCount() const { - return _expressions.size(); - }; - - /* - Specialized BSON conversion that allows for writing out a - $project specification. This creates a standalone object, which must - be added to a containing object with a name + static boost::intrusive_ptr<ExpressionObject> create( + std::vector<std::pair<std::string, boost::intrusive_ptr<Expression>>>&& expressions); - @param pBuilder where to write the object to - @param requireExpression see Expression::addToBsonObj + /** + * Parses and constructs an ExpressionObject from 'obj'. */ - void documentToBson(BSONObjBuilder* pBuilder, bool requireExpression) const; + static boost::intrusive_ptr<ExpressionObject> parse(BSONObj obj, + const VariablesParseState& vps); - /* - Visitor abstraction used by emitPaths(). Each path is recorded by - calling path(). + /** + * This ExpressionObject must outlive the returned vector. */ - class PathSink { - public: - virtual ~PathSink(){}; - - /** - Record a path. - - @param path the dotted path string - @param include if true, the path is included; if false, the path - is excluded - */ - virtual void path(const std::string& path, bool include) = 0; - }; - - void excludeId(bool b) { - _excludeId = b; - } - - const FieldMap& getChildExpressions() const { + const std::vector<std::pair<std::string, boost::intrusive_ptr<Expression>>>& + getChildExpressions() const { return _expressions; } private: - explicit ExpressionObject(bool atRoot); - - // Mapping from fieldname to the Expression that generates its value. - // NULL expression means inclusion from source document. - FieldMap _expressions; - - // this is used to maintain order for generated fields not in the source document - std::vector<std::string> _order; + ExpressionObject( + std::vector<std::pair<std::string, boost::intrusive_ptr<Expression>>>&& expressions); - bool _excludeId; - bool _atRoot; + // The mapping from field name to expression within this object. This needs to respect the order + // in which the fields were specified in the input BSON. + std::vector<std::pair<std::string, boost::intrusive_ptr<Expression>>> _expressions; }; @@ -1174,7 +1065,7 @@ class ExpressionRange final : public ExpressionRangedArity<ExpressionRange, 2, 3 class ExpressionReduce final : public Expression { public: - void addDependencies(DepsTracker* deps, std::vector<std::string>* path = nullptr) const final; + void addDependencies(DepsTracker* deps) const final; Value evaluateInternal(Variables* vars) const final; boost::intrusive_ptr<Expression> optimize() final; static boost::intrusive_ptr<Expression> parse(BSONElement expr, @@ -1343,7 +1234,7 @@ public: class ExpressionSwitch final : public ExpressionFixedArity<ExpressionSwitch, 1> { public: - void addDependencies(DepsTracker* deps, std::vector<std::string>* path = nullptr) const final; + void addDependencies(DepsTracker* deps) const final; Value evaluateInternal(Variables* vars) const final; boost::intrusive_ptr<Expression> optimize() final; static boost::intrusive_ptr<Expression> parse(BSONElement expr, @@ -1438,7 +1329,7 @@ public: class ExpressionZip final : public ExpressionFixedArity<ExpressionZip, 1> { public: - void addDependencies(DepsTracker* deps, std::vector<std::string>* path = nullptr) const final; + void addDependencies(DepsTracker* deps) const final; Value evaluateInternal(Variables* vars) const final; boost::intrusive_ptr<Expression> optimize() final; static boost::intrusive_ptr<Expression> parse(BSONElement expr, diff --git a/src/mongo/db/pipeline/expression_test.cpp b/src/mongo/db/pipeline/expression_test.cpp index 2930d3fd1d2..d7437178cb9 100644 --- a/src/mongo/db/pipeline/expression_test.cpp +++ b/src/mongo/db/pipeline/expression_test.cpp @@ -60,8 +60,7 @@ static void assertExpectedResults(string expression, VariablesIdGenerator idGenerator; VariablesParseState vps(&idGenerator); const BSONObj obj = BSON(expression << Value(op.first)); - Value result = - Expression::parseExpression(obj.firstElement(), vps)->evaluate(Document()); + Value result = Expression::parseExpression(obj, vps)->evaluate(Document()); ASSERT_EQUALS(op.second, result); ASSERT_EQUALS(op.second.getType(), result.getType()); } catch (...) { @@ -126,6 +125,10 @@ Value valueFromBson(BSONObj obj) { return Value(element); } +template <typename T> +intrusive_ptr<ExpressionConstant> makeConstant(T&& val) { + return ExpressionConstant::create(Value(std::forward<T>(val))); +} class ExpressionBaseTest : public unittest::Test { public: @@ -257,11 +260,9 @@ TEST_F(ExpressionNaryTest, ValidateObjectExpressionDependency) { << "q" << "$r")); BSONElement specElement = spec.firstElement(); - Expression::ObjectCtx ctx(Expression::ObjectCtx::DOCUMENT_OK); VariablesIdGenerator idGenerator; VariablesParseState vps(&idGenerator); - _notAssociativeNorCommutative->addOperand( - Expression::parseObject(specElement.Obj(), &ctx, vps)); + _notAssociativeNorCommutative->addOperand(Expression::parseObject(specElement.Obj(), vps)); assertDependencies(_notAssociativeNorCommutative, BSON_ARRAY("r" << "x")); @@ -2326,872 +2327,222 @@ public: } // namespace FieldPath namespace Object { +using mongo::ExpressionObject; -class Base { -protected: - void assertDependencies(const BSONArray& expectedDependencies, - const intrusive_ptr<ExpressionObject>& expression, - bool includePath = true) const { - vector<string> path; - DepsTracker dependencies; - expression->addDependencies(&dependencies, includePath ? &path : 0); - BSONArrayBuilder bab; - for (set<string>::const_iterator i = dependencies.fields.begin(); - i != dependencies.fields.end(); - ++i) { - bab << *i; - } - ASSERT_EQUALS(expectedDependencies, bab.arr()); - ASSERT_EQUALS(false, dependencies.needWholeDocument); - ASSERT_EQUALS(false, dependencies.needTextScore); - } -}; - -class ExpectedResultBase : public Base { -public: - virtual ~ExpectedResultBase() {} - void run() { - _expression = ExpressionObject::createRoot(); - prepareExpression(); - Document document = fromBson(source()); - MutableDocument result; - Variables vars(0, document); - expression()->addToDocument(result, document, &vars); - assertBinaryEqual(expected(), toBson(result.freeze())); - assertDependencies(expectedDependencies(), _expression); - ASSERT_EQUALS(expectedBsonRepresentation(), expressionToBson(_expression)); - ASSERT_EQUALS(expectedIsSimple(), _expression->isSimple()); - } - -protected: - intrusive_ptr<ExpressionObject> expression() { - return _expression; - } - virtual BSONObj source() { - return BSON("_id" << 0 << "a" << 1 << "b" << 2); - } - virtual void prepareExpression() = 0; - virtual BSONObj expected() = 0; - virtual BSONArray expectedDependencies() = 0; - virtual BSONObj expectedBsonRepresentation() = 0; - virtual bool expectedIsSimple() { - return true; - } - -private: - intrusive_ptr<ExpressionObject> _expression; -}; - -/** Empty object spec. */ -class Empty : public ExpectedResultBase { -public: - void prepareExpression() {} - BSONObj expected() { - return BSON("_id" << 0); - } - BSONArray expectedDependencies() { - return BSON_ARRAY("_id"); - } - BSONObj expectedBsonRepresentation() { - return BSONObj(); - } -}; - -/** Include 'a' field only. */ -class Include : public ExpectedResultBase { -public: - void prepareExpression() { - expression()->includePath("a"); - } - BSONObj expected() { - return BSON("_id" << 0 << "a" << 1); - } - BSONArray expectedDependencies() { - return BSON_ARRAY("_id" - << "a"); - } - BSONObj expectedBsonRepresentation() { - return BSON("a" << true); - } -}; - -/** Cannot include missing 'a' field. */ -class MissingInclude : public ExpectedResultBase { -public: - virtual BSONObj source() { - return BSON("_id" << 0 << "b" << 2); - } - void prepareExpression() { - expression()->includePath("a"); - } - BSONObj expected() { - return BSON("_id" << 0); - } - BSONArray expectedDependencies() { - return BSON_ARRAY("_id" - << "a"); - } - BSONObj expectedBsonRepresentation() { - return BSON("a" << true); - } -}; - -/** Include '_id' field only. */ -class IncludeId : public ExpectedResultBase { -public: - void prepareExpression() { - expression()->includePath("_id"); - } - BSONObj expected() { - return BSON("_id" << 0); - } - BSONArray expectedDependencies() { - return BSON_ARRAY("_id"); - } - BSONObj expectedBsonRepresentation() { - return BSON("_id" << true); - } -}; - -/** Exclude '_id' field. */ -class ExcludeId : public ExpectedResultBase { -public: - void prepareExpression() { - expression()->includePath("b"); - expression()->excludeId(true); - } - BSONObj expected() { - return BSON("b" << 2); - } - BSONArray expectedDependencies() { - return BSON_ARRAY("b"); - } - BSONObj expectedBsonRepresentation() { - return BSON("_id" << false << "b" << true); - } -}; - -/** Result order based on source document field order, not inclusion spec field - * order. */ -class SourceOrder : public ExpectedResultBase { -public: - void prepareExpression() { - expression()->includePath("b"); - expression()->includePath("a"); - } - BSONObj expected() { - return source(); - } - BSONArray expectedDependencies() { - return BSON_ARRAY("_id" - << "a" - << "b"); - } - BSONObj expectedBsonRepresentation() { - return BSON("b" << true << "a" << true); - } -}; - -/** Include a nested field. */ -class IncludeNested : public ExpectedResultBase { -public: - void prepareExpression() { - expression()->includePath("a.b"); - } - BSONObj expected() { - return BSON("_id" << 0 << "a" << BSON("b" << 5)); - } - BSONObj source() { - return BSON("_id" << 0 << "a" << BSON("b" << 5 << "c" << 6) << "z" << 2); - } - BSONArray expectedDependencies() { - return BSON_ARRAY("_id" - << "a.b"); - } - BSONObj expectedBsonRepresentation() { - return BSON("a" << BSON("b" << true)); - } -}; - -/** Include two nested fields. */ -class IncludeTwoNested : public ExpectedResultBase { -public: - void prepareExpression() { - expression()->includePath("a.b"); - expression()->includePath("a.c"); - } - BSONObj expected() { - return BSON("_id" << 0 << "a" << BSON("b" << 5 << "c" << 6)); - } - BSONObj source() { - return BSON("_id" << 0 << "a" << BSON("b" << 5 << "c" << 6) << "z" << 2); - } - BSONArray expectedDependencies() { - return BSON_ARRAY("_id" - << "a.b" - << "a.c"); - } - BSONObj expectedBsonRepresentation() { - return BSON("a" << BSON("b" << true << "c" << true)); - } -}; - -/** Include two fields nested within different parents. */ -class IncludeTwoParentNested : public ExpectedResultBase { -public: - void prepareExpression() { - expression()->includePath("a.b"); - expression()->includePath("c.d"); - } - BSONObj expected() { - return BSON("_id" << 0 << "a" << BSON("b" << 5) << "c" << BSON("d" << 6)); - } - BSONObj source() { - return BSON("_id" << 0 << "a" << BSON("b" << 5) << "c" << BSON("d" << 6) << "z" << 2); - } - BSONArray expectedDependencies() { - return BSON_ARRAY("_id" - << "a.b" - << "c.d"); - } - BSONObj expectedBsonRepresentation() { - return BSON("a" << BSON("b" << true) << "c" << BSON("d" << true)); - } -}; - -/** Attempt to include a missing nested field. */ -class IncludeMissingNested : public ExpectedResultBase { -public: - void prepareExpression() { - expression()->includePath("a.b"); - } - BSONObj expected() { - return BSON("_id" << 0 << "a" << BSONObj()); - } - BSONObj source() { - return BSON("_id" << 0 << "a" << BSON("c" << 6) << "z" << 2); - } - BSONArray expectedDependencies() { - return BSON_ARRAY("_id" - << "a.b"); - } - BSONObj expectedBsonRepresentation() { - return BSON("a" << BSON("b" << true)); - } -}; - -/** Attempt to include a nested field within a non object. */ -class IncludeNestedWithinNonObject : public ExpectedResultBase { -public: - void prepareExpression() { - expression()->includePath("a.b"); - } - BSONObj expected() { - return BSON("_id" << 0); - } - BSONObj source() { - return BSON("_id" << 0 << "a" << 2 << "z" << 2); - } - BSONArray expectedDependencies() { - return BSON_ARRAY("_id" - << "a.b"); - } - BSONObj expectedBsonRepresentation() { - return BSON("a" << BSON("b" << true)); - } -}; - -/** Include a nested field within an array. */ -class IncludeArrayNested : public ExpectedResultBase { -public: - void prepareExpression() { - expression()->includePath("a.b"); - } - BSONObj expected() { - return fromjson("{_id:0,a:[{b:5},{b:2},{}]}"); - } - BSONObj source() { - return fromjson("{_id:0,a:[{b:5,c:6},{b:2,c:9},{c:7},[],2],z:1}"); - } - BSONArray expectedDependencies() { - return BSON_ARRAY("_id" - << "a.b"); - } - BSONObj expectedBsonRepresentation() { - return BSON("a" << BSON("b" << true)); - } -}; +template <typename T> +Document literal(T&& value) { + return Document{{"$const", Value(std::forward<T>(value))}}; +} -/** Don't include not root '_id' field implicitly. */ -class ExcludeNonRootId : public ExpectedResultBase { -public: - virtual BSONObj source() { - return BSON("_id" << 0 << "a" << BSON("_id" << 1 << "b" << 1)); - } - void prepareExpression() { - expression()->includePath("a.b"); - } - BSONObj expected() { - return BSON("_id" << 0 << "a" << BSON("b" << 1)); - } - BSONArray expectedDependencies() { - return BSON_ARRAY("_id" - << "a.b"); - } - BSONObj expectedBsonRepresentation() { - return BSON("a" << BSON("b" << true)); - } -}; +// +// Parsing. +// -/** Project a computed expression. */ -class Computed : public ExpectedResultBase { -public: - virtual BSONObj source() { - return BSON("_id" << 0); - } - void prepareExpression() { - expression()->addField(mongo::FieldPath("a"), ExpressionConstant::create(Value(5))); - } - BSONObj expected() { - return BSON("_id" << 0 << "a" << 5); - } - BSONArray expectedDependencies() { - return BSON_ARRAY("_id"); - } - BSONObj expectedBsonRepresentation() { - return BSON("a" << BSON("$const" << 5)); - } - bool expectedIsSimple() { - return false; - } -}; - -/** Project a computed expression replacing an existing field. */ -class ComputedReplacement : public Computed { - virtual BSONObj source() { - return BSON("_id" << 0 << "a" << 99); - } -}; +TEST(ExpressionObjectParse, ShouldAcceptEmptyObject) { + VariablesIdGenerator idGen; + VariablesParseState vps(&idGen); + auto object = ExpressionObject::parse(BSONObj(), vps); + ASSERT_EQUALS(Value(Document{}), object->serialize(false)); +} -/** An undefined value is passed through */ -class ComputedUndefined : public ExpectedResultBase { -public: - virtual BSONObj source() { - return BSON("_id" << 0); - } - void prepareExpression() { - expression()->addField(mongo::FieldPath("a"), - ExpressionConstant::create(Value(BSONUndefined))); - } - BSONObj expected() { - return BSON("_id" << 0 << "a" << BSONUndefined); - } - BSONArray expectedDependencies() { - return BSON_ARRAY("_id"); - } - BSONObj expectedBsonRepresentation() { - return fromjson("{a:{$const:undefined}}"); - } - bool expectedIsSimple() { - return false; - } -}; +TEST(ExpressionObjectParse, ShouldAcceptLiteralsAsValues) { + VariablesIdGenerator idGen; + VariablesParseState vps(&idGen); + auto object = ExpressionObject::parse(BSON("a" << 5 << "b" + << "string" + << "c" + << BSONNULL), + vps); + auto expectedResult = + Value(Document{{"a", literal(5)}, {"b", literal("string")}, {"c", literal(BSONNULL)}}); + ASSERT_EQUALS(expectedResult, object->serialize(false)); +} -/** Project a computed expression replacing an existing field with Undefined. */ -class ComputedUndefinedReplacement : public ComputedUndefined { - virtual BSONObj source() { - return BSON("_id" << 0 << "a" << 99); - } -}; +TEST(ExpressionObjectParse, ShouldAccept_idAsFieldName) { + VariablesIdGenerator idGen; + VariablesParseState vps(&idGen); + auto object = ExpressionObject::parse(BSON("_id" << 5), vps); + auto expectedResult = Value(Document{{"_id", literal(5)}}); + ASSERT_EQUALS(expectedResult, object->serialize(false)); +} -/** A null value is projected. */ -class ComputedNull : public ExpectedResultBase { -public: - virtual BSONObj source() { - return BSON("_id" << 0); - } - void prepareExpression() { - expression()->addField(mongo::FieldPath("a"), ExpressionConstant::create(Value(BSONNULL))); - } - BSONObj expected() { - return BSON("_id" << 0 << "a" << BSONNULL); - } - BSONArray expectedDependencies() { - return BSON_ARRAY("_id"); - } - BSONObj expectedBsonRepresentation() { - return BSON("a" << BSON("$const" << BSONNULL)); - } - bool expectedIsSimple() { - return false; - } -}; +TEST(ExpressionObjectParse, ShouldAcceptFieldNameContainingDollar) { + VariablesIdGenerator idGen; + VariablesParseState vps(&idGen); + auto object = ExpressionObject::parse(BSON("a$b" << 5), vps); + auto expectedResult = Value(Document{{"a$b", literal(5)}}); + ASSERT_EQUALS(expectedResult, object->serialize(false)); +} -/** A nested value is projected. */ -class ComputedNested : public ExpectedResultBase { -public: - virtual BSONObj source() { - return BSON("_id" << 0); - } - void prepareExpression() { - expression()->addField(mongo::FieldPath("a.b"), ExpressionConstant::create(Value(5))); - } - BSONObj expected() { - return BSON("_id" << 0 << "a" << BSON("b" << 5)); - } - BSONArray expectedDependencies() { - return BSON_ARRAY("_id"); - } - BSONObj expectedBsonRepresentation() { - return BSON("a" << BSON("b" << BSON("$const" << 5))); - } - bool expectedIsSimple() { - return false; - } -}; +TEST(ExpressionObjectParse, ShouldAcceptNestedObjects) { + VariablesIdGenerator idGen; + VariablesParseState vps(&idGen); + auto object = ExpressionObject::parse(fromjson("{a: {b: 1}, c: {d: {e: 1, f: 1}}}"), vps); + auto expectedResult = + Value(Document{{"a", Document{{"b", literal(1)}}}, + {"c", Document{{"d", Document{{"e", literal(1)}, {"f", literal(1)}}}}}}); + ASSERT_EQUALS(expectedResult, object->serialize(false)); +} -/** A field path is projected. */ -class ComputedFieldPath : public ExpectedResultBase { -public: - virtual BSONObj source() { - return BSON("_id" << 0 << "x" << 4); - } - void prepareExpression() { - expression()->addField(mongo::FieldPath("a"), ExpressionFieldPath::create("x")); - } - BSONObj expected() { - return BSON("_id" << 0 << "a" << 4); - } - BSONArray expectedDependencies() { - return BSON_ARRAY("_id" - << "x"); - } - BSONObj expectedBsonRepresentation() { - return BSON("a" - << "$x"); - } - bool expectedIsSimple() { - return false; - } -}; +TEST(ExpressionObjectParse, ShouldAcceptArrays) { + VariablesIdGenerator idGen; + VariablesParseState vps(&idGen); + auto object = ExpressionObject::parse(fromjson("{a: [1, 2]}"), vps); + auto expectedResult = + Value(Document{{"a", vector<Value>{Value(literal(1)), Value(literal(2))}}}); + ASSERT_EQUALS(expectedResult, object->serialize(false)); +} -/** A nested field path is projected. */ -class ComputedNestedFieldPath : public ExpectedResultBase { -public: - virtual BSONObj source() { - return BSON("_id" << 0 << "x" << BSON("y" << 4)); - } - void prepareExpression() { - expression()->addField(mongo::FieldPath("a.b"), ExpressionFieldPath::create("x.y")); - } - BSONObj expected() { - return BSON("_id" << 0 << "a" << BSON("b" << 4)); - } - BSONArray expectedDependencies() { - return BSON_ARRAY("_id" - << "x.y"); - } - BSONObj expectedBsonRepresentation() { - return BSON("a" << BSON("b" - << "$x.y")); - } - bool expectedIsSimple() { - return false; - } -}; +TEST(ObjectParsing, ShouldAcceptExpressionAsValue) { + VariablesIdGenerator idGen; + VariablesParseState vps(&idGen); + auto object = ExpressionObject::parse(BSON("a" << BSON("$and" << BSONArray())), vps); + ASSERT_EQ(object->serialize(false), Value(Document{{"a", Document{{"$and", BSONArray()}}}})); +} -/** An empty subobject expression for a missing field is not projected. */ -class EmptyNewSubobject : public ExpectedResultBase { -public: - virtual BSONObj source() { - return BSON("_id" << 0); - } - void prepareExpression() { - // Create a sub expression returning an empty object. - intrusive_ptr<ExpressionObject> subExpression = ExpressionObject::create(); - subExpression->addField(mongo::FieldPath("b"), ExpressionFieldPath::create("a.b")); - expression()->addField(mongo::FieldPath("a"), subExpression); - } - BSONObj expected() { - return BSON("_id" << 0); - } - BSONArray expectedDependencies() { - return BSON_ARRAY("_id" - << "a.b"); - } - BSONObj expectedBsonRepresentation() { - return fromjson("{a:{b:'$a.b'}}"); - } - bool expectedIsSimple() { - return false; - } -}; +// +// Error cases. +// -/** A non empty subobject expression for a missing field is projected. */ -class NonEmptyNewSubobject : public ExpectedResultBase { -public: - virtual BSONObj source() { - return BSON("_id" << 0); - } - void prepareExpression() { - // Create a sub expression returning an empty object. - intrusive_ptr<ExpressionObject> subExpression = ExpressionObject::create(); - subExpression->addField(mongo::FieldPath("b"), ExpressionConstant::create(Value(6))); - expression()->addField(mongo::FieldPath("a"), subExpression); - } - BSONObj expected() { - return BSON("_id" << 0 << "a" << BSON("b" << 6)); - } - BSONArray expectedDependencies() { - return BSON_ARRAY("_id"); - } - BSONObj expectedBsonRepresentation() { - return fromjson("{a:{b:{$const:6}}}"); - } - bool expectedIsSimple() { - return false; - } -}; +TEST(ExpressionObjectParse, ShouldRejectDottedFieldNames) { + VariablesIdGenerator idGen; + VariablesParseState vps(&idGen); + ASSERT_THROWS(ExpressionObject::parse(BSON("a.b" << 1), vps), UserException); + ASSERT_THROWS(ExpressionObject::parse(BSON("c" << 3 << "a.b" << 1), vps), UserException); + ASSERT_THROWS(ExpressionObject::parse(BSON("a.b" << 1 << "c" << 3), vps), UserException); +} -/** Two computed fields within a common parent. */ -class AdjacentDottedComputedFields : public ExpectedResultBase { -public: - virtual BSONObj source() { - return BSON("_id" << 0); - } - void prepareExpression() { - expression()->addField(mongo::FieldPath("a.b"), ExpressionConstant::create(Value(6))); - expression()->addField(mongo::FieldPath("a.c"), ExpressionConstant::create(Value(7))); - } - BSONObj expected() { - return BSON("_id" << 0 << "a" << BSON("b" << 6 << "c" << 7)); - } - BSONArray expectedDependencies() { - return BSON_ARRAY("_id"); - } - BSONObj expectedBsonRepresentation() { - return fromjson("{a:{b:{$const:6},c:{$const:7}}}"); - } - bool expectedIsSimple() { - return false; - } -}; +TEST(ExpressionObjectParse, ShouldRejectDuplicateFieldNames) { + VariablesIdGenerator idGen; + VariablesParseState vps(&idGen); + ASSERT_THROWS(ExpressionObject::parse(BSON("a" << 1 << "a" << 1), vps), UserException); + ASSERT_THROWS(ExpressionObject::parse(BSON("a" << 1 << "b" << 2 << "a" << 1), vps), + UserException); + ASSERT_THROWS(ExpressionObject::parse(BSON("a" << BSON("c" << 1) << "b" << 2 << "a" << 1), vps), + UserException); + ASSERT_THROWS(ExpressionObject::parse(BSON("a" << 1 << "b" << 2 << "a" << BSON("c" << 1)), vps), + UserException); +} -/** Two computed fields within a common parent, in one case dotted. */ -class AdjacentDottedAndNestedComputedFields : public AdjacentDottedComputedFields { - void prepareExpression() { - expression()->addField(mongo::FieldPath("a.b"), ExpressionConstant::create(Value(6))); - intrusive_ptr<ExpressionObject> subExpression = ExpressionObject::create(); - subExpression->addField(mongo::FieldPath("c"), ExpressionConstant::create(Value(7))); - expression()->addField(mongo::FieldPath("a"), subExpression); - } -}; +TEST(ExpressionObjectParse, ShouldRejectInvalidFieldName) { + VariablesIdGenerator idGen; + VariablesParseState vps(&idGen); + ASSERT_THROWS(ExpressionObject::parse(BSON("$a" << 1), vps), UserException); + ASSERT_THROWS(ExpressionObject::parse(BSON("" << 1), vps), UserException); + ASSERT_THROWS(ExpressionObject::parse(BSON(std::string("a\0b", 3) << 1), vps), UserException); +} -/** Two computed fields within a common parent, in another case dotted. */ -class AdjacentNestedAndDottedComputedFields : public AdjacentDottedComputedFields { - void prepareExpression() { - intrusive_ptr<ExpressionObject> subExpression = ExpressionObject::create(); - subExpression->addField(mongo::FieldPath("b"), ExpressionConstant::create(Value(6))); - expression()->addField(mongo::FieldPath("a"), subExpression); - expression()->addField(mongo::FieldPath("a.c"), ExpressionConstant::create(Value(7))); - } -}; +TEST(ExpressionObjectParse, ShouldRejectInvalidFieldPathAsValue) { + VariablesIdGenerator idGen; + VariablesParseState vps(&idGen); + ASSERT_THROWS(ExpressionObject::parse(BSON("a" + << "$field."), + vps), + UserException); +} -/** Two computed fields within a common parent, nested rather than dotted. */ -class AdjacentNestedComputedFields : public AdjacentDottedComputedFields { - void prepareExpression() { - intrusive_ptr<ExpressionObject> firstSubExpression = ExpressionObject::create(); - firstSubExpression->addField(mongo::FieldPath("b"), ExpressionConstant::create(Value(6))); - expression()->addField(mongo::FieldPath("a"), firstSubExpression); - intrusive_ptr<ExpressionObject> secondSubExpression = ExpressionObject::create(); - secondSubExpression->addField(mongo::FieldPath("c"), ExpressionConstant::create(Value(7))); - expression()->addField(mongo::FieldPath("a"), secondSubExpression); - } -}; +TEST(ParseObject, ShouldRejectExpressionAsTheSecondField) { + VariablesIdGenerator idGen; + VariablesParseState vps(&idGen); + ASSERT_THROWS(ExpressionObject::parse( + BSON("a" << BSON("$and" << BSONArray()) << "$or" << BSONArray()), vps), + UserException); +} -/** Field ordering is preserved when nested fields are merged. */ -class AdjacentNestedOrdering : public ExpectedResultBase { -public: - virtual BSONObj source() { - return BSON("_id" << 0); - } - void prepareExpression() { - expression()->addField(mongo::FieldPath("a.b"), ExpressionConstant::create(Value(6))); - intrusive_ptr<ExpressionObject> subExpression = ExpressionObject::create(); - // Add field 'd' then 'c'. Expect the same field ordering in the result - // doc. - subExpression->addField(mongo::FieldPath("d"), ExpressionConstant::create(Value(7))); - subExpression->addField(mongo::FieldPath("c"), ExpressionConstant::create(Value(8))); - expression()->addField(mongo::FieldPath("a"), subExpression); - } - BSONObj expected() { - return BSON("_id" << 0 << "a" << BSON("b" << 6 << "d" << 7 << "c" << 8)); - } - BSONArray expectedDependencies() { - return BSON_ARRAY("_id"); - } - BSONObj expectedBsonRepresentation() { - return fromjson("{a:{b:{$const:6},d:{$const:7},c:{$const:8}}}"); - } - bool expectedIsSimple() { - return false; - } -}; +// +// Evaluation. +// -/** Adjacent fields two levels deep. */ -class MultipleNestedFields : public ExpectedResultBase { -public: - virtual BSONObj source() { - return BSON("_id" << 0); - } - void prepareExpression() { - expression()->addField(mongo::FieldPath("a.b.c"), ExpressionConstant::create(Value(6))); - intrusive_ptr<ExpressionObject> bSubExpression = ExpressionObject::create(); - bSubExpression->addField(mongo::FieldPath("d"), ExpressionConstant::create(Value(7))); - intrusive_ptr<ExpressionObject> aSubExpression = ExpressionObject::create(); - aSubExpression->addField(mongo::FieldPath("b"), bSubExpression); - expression()->addField(mongo::FieldPath("a"), aSubExpression); - } - BSONObj expected() { - return BSON("_id" << 0 << "a" << BSON("b" << BSON("c" << 6 << "d" << 7))); - } - BSONArray expectedDependencies() { - return BSON_ARRAY("_id"); - } - BSONObj expectedBsonRepresentation() { - return fromjson("{a:{b:{c:{$const:6},d:{$const:7}}}}"); - } - bool expectedIsSimple() { - return false; - } -}; +TEST(ExpressionObjectEvaluate, EmptyObjectShouldEvaluateToEmptyDocument) { + auto object = ExpressionObject::create({}); + ASSERT_EQUALS(Value(Document()), object->evaluate(Document())); + ASSERT_EQUALS(Value(Document()), object->evaluate(Document{{"a", 1}})); + ASSERT_EQUALS(Value(Document()), object->evaluate(Document{{"_id", "ID"}})); +} -/** Two expressions cannot generate the same field. */ -class ConflictingExpressionFields : public Base { -public: - void run() { - intrusive_ptr<ExpressionObject> expression = ExpressionObject::createRoot(); - expression->addField(mongo::FieldPath("a"), ExpressionConstant::create(Value(5))); - ASSERT_THROWS(expression->addField(mongo::FieldPath("a"), // Duplicate field. - ExpressionConstant::create(Value(6))), - UserException); - } -}; +TEST(ExpressionObjectEvaluate, ShouldEvaluateEachField) { + auto object = ExpressionObject::create({{"a", makeConstant(1)}, {"b", makeConstant(5)}}); + ASSERT_EQUALS(Value(Document{{"a", 1}, {"b", 5}}), object->evaluate(Document())); + ASSERT_EQUALS(Value(Document{{"a", 1}, {"b", 5}}), object->evaluate(Document{{"a", 1}})); + ASSERT_EQUALS(Value(Document{{"a", 1}, {"b", 5}}), object->evaluate(Document{{"_id", "ID"}})); +} -/** An expression field conflicts with an inclusion field. */ -class ConflictingInclusionExpressionFields : public Base { -public: - void run() { - intrusive_ptr<ExpressionObject> expression = ExpressionObject::createRoot(); - expression->includePath("a"); - ASSERT_THROWS( - expression->addField(mongo::FieldPath("a"), ExpressionConstant::create(Value(6))), - UserException); - } -}; +TEST(ExpressionObjectEvaluate, OrderOfFieldsInOutputShouldMatchOrderInSpecification) { + auto object = ExpressionObject::create({{"a", ExpressionFieldPath::create("a")}, + {"b", ExpressionFieldPath::create("b")}, + {"c", ExpressionFieldPath::create("c")}}); + ASSERT_EQUALS(Value(Document{{"a", "A"}, {"b", "B"}, {"c", "C"}}), + object->evaluate(Document{{"c", "C"}, {"a", "A"}, {"b", "B"}, {"_id", "ID"}})); +} -/** An inclusion field conflicts with an expression field. */ -class ConflictingExpressionInclusionFields : public Base { -public: - void run() { - intrusive_ptr<ExpressionObject> expression = ExpressionObject::createRoot(); - expression->addField(mongo::FieldPath("a"), ExpressionConstant::create(Value(5))); - ASSERT_THROWS(expression->includePath("a"), UserException); - } -}; +TEST(ExpressionObjectEvaluate, ShouldRemoveFieldsThatHaveMissingValues) { + auto object = ExpressionObject::create( + {{"a", ExpressionFieldPath::create("a.b")}, {"b", ExpressionFieldPath::create("missing")}}); + ASSERT_EQUALS(Value(Document{}), object->evaluate(Document())); + ASSERT_EQUALS(Value(Document{}), object->evaluate(Document{{"a", 1}})); +} -/** An object expression conflicts with a constant expression. */ -class ConflictingObjectConstantExpressionFields : public Base { -public: - void run() { - intrusive_ptr<ExpressionObject> expression = ExpressionObject::createRoot(); - intrusive_ptr<ExpressionObject> subExpression = ExpressionObject::create(); - subExpression->includePath("b"); - expression->addField(mongo::FieldPath("a"), subExpression); - ASSERT_THROWS( - expression->addField(mongo::FieldPath("a.b"), ExpressionConstant::create(Value(6))), - UserException); - } -}; +TEST(ExpressionObjectEvaluate, ShouldEvaluateFieldsWithinNestedObject) { + auto object = ExpressionObject::create( + {{"a", + ExpressionObject::create( + {{"b", makeConstant(1)}, {"c", ExpressionFieldPath::create("_id")}})}}); + ASSERT_EQUALS(Value(Document{{"a", Document{{"b", 1}}}}), object->evaluate(Document())); + ASSERT_EQUALS(Value(Document{{"a", Document{{"b", 1}, {"c", "ID"}}}}), + object->evaluate(Document{{"_id", "ID"}})); +} -/** A constant expression conflicts with an object expression. */ -class ConflictingConstantObjectExpressionFields : public Base { -public: - void run() { - intrusive_ptr<ExpressionObject> expression = ExpressionObject::createRoot(); - expression->addField(mongo::FieldPath("a.b"), ExpressionConstant::create(Value(6))); - intrusive_ptr<ExpressionObject> subExpression = ExpressionObject::create(); - subExpression->includePath("b"); - ASSERT_THROWS(expression->addField(mongo::FieldPath("a"), subExpression), UserException); - } -}; +TEST(ExpressionObjectEvaluate, ShouldEvaluateToEmptyDocumentIfAllFieldsAreMissing) { + auto object = ExpressionObject::create({{"a", ExpressionFieldPath::create("missing")}}); + ASSERT_EQUALS(Value(Document{}), object->evaluate(Document())); -/** Two nested expressions cannot generate the same field. */ -class ConflictingNestedFields : public Base { -public: - void run() { - intrusive_ptr<ExpressionObject> expression = ExpressionObject::createRoot(); - expression->addField(mongo::FieldPath("a.b"), ExpressionConstant::create(Value(5))); - ASSERT_THROWS(expression->addField(mongo::FieldPath("a.b"), // Duplicate field. - ExpressionConstant::create(Value(6))), - UserException); - } -}; + auto objectWithNestedObject = ExpressionObject::create({{"nested", object}}); + ASSERT_EQUALS(Value(Document{{"nested", Document{}}}), + objectWithNestedObject->evaluate(Document())); +} -/** An expression cannot be created for a subfield of another expression. */ -class ConflictingFieldAndSubfield : public Base { -public: - void run() { - intrusive_ptr<ExpressionObject> expression = ExpressionObject::createRoot(); - expression->addField(mongo::FieldPath("a"), ExpressionConstant::create(Value(5))); - ASSERT_THROWS( - expression->addField(mongo::FieldPath("a.b"), ExpressionConstant::create(Value(5))), - UserException); - } -}; +// +// Dependencies. +// -/** An expression cannot be created for a nested field of another expression. */ -class ConflictingFieldAndNestedField : public Base { -public: - void run() { - intrusive_ptr<ExpressionObject> expression = ExpressionObject::createRoot(); - expression->addField(mongo::FieldPath("a"), ExpressionConstant::create(Value(5))); - intrusive_ptr<ExpressionObject> subExpression = ExpressionObject::create(); - subExpression->addField(mongo::FieldPath("b"), ExpressionConstant::create(Value(5))); - ASSERT_THROWS(expression->addField(mongo::FieldPath("a"), subExpression), UserException); - } -}; +TEST(ExpressionObjectDependencies, ConstantValuesShouldNotBeAddedToDependencies) { + auto object = ExpressionObject::create({{"a", makeConstant(5)}}); + DepsTracker deps; + object->addDependencies(&deps); + ASSERT_EQ(deps.fields.size(), 0UL); +} -/** An expression cannot be created for a parent field of another expression. */ -class ConflictingSubfieldAndField : public Base { -public: - void run() { - intrusive_ptr<ExpressionObject> expression = ExpressionObject::createRoot(); - expression->addField(mongo::FieldPath("a.b"), ExpressionConstant::create(Value(5))); - ASSERT_THROWS( - expression->addField(mongo::FieldPath("a"), ExpressionConstant::create(Value(5))), - UserException); - } +TEST(ExpressionObjectDependencies, FieldPathsShouldBeAddedToDependencies) { + auto object = ExpressionObject::create({{"x", ExpressionFieldPath::create("c.d")}}); + DepsTracker deps; + object->addDependencies(&deps); + ASSERT_EQ(deps.fields.size(), 1UL); + ASSERT_EQ(deps.fields.count("c.d"), 1UL); }; -/** An expression cannot be created for a parent of a nested field. */ -class ConflictingNestedFieldAndField : public Base { -public: - void run() { - intrusive_ptr<ExpressionObject> expression = ExpressionObject::createRoot(); - intrusive_ptr<ExpressionObject> subExpression = ExpressionObject::create(); - subExpression->addField(mongo::FieldPath("b"), ExpressionConstant::create(Value(5))); - expression->addField(mongo::FieldPath("a"), subExpression); - ASSERT_THROWS( - expression->addField(mongo::FieldPath("a"), ExpressionConstant::create(Value(5))), - UserException); - } -}; +// +// Optimizations. +// -/** Dependencies for non inclusion expressions. */ -class NonInclusionDependencies : public Base { -public: - void run() { - intrusive_ptr<ExpressionObject> expression = ExpressionObject::createRoot(); - expression->addField(mongo::FieldPath("a"), ExpressionConstant::create(Value(5))); - assertDependencies(BSON_ARRAY("_id"), expression, true); - assertDependencies(BSONArray(), expression, false); - expression->addField(mongo::FieldPath("b"), ExpressionFieldPath::create("c.d")); - assertDependencies(BSON_ARRAY("_id" - << "c.d"), - expression, - true); - assertDependencies(BSON_ARRAY("c.d"), expression, false); - } -}; +TEST(ExpressionObjectOptimizations, OptimizingAnObjectShouldOptimizeSubExpressions) { + // Build up the object {a: {$add: [1, 2]}}. + VariablesIdGenerator idGen; + VariablesParseState vps(&idGen); + auto addExpression = + ExpressionAdd::parse(BSON("$add" << BSON_ARRAY(1 << 2)).firstElement(), vps); + auto object = ExpressionObject::create({{"a", addExpression}}); + ASSERT_EQ(object->getChildExpressions().size(), 1UL); -/** Dependencies for inclusion expressions. */ -class InclusionDependencies : public Base { -public: - void run() { - intrusive_ptr<ExpressionObject> expression = ExpressionObject::createRoot(); - expression->includePath("a"); - assertDependencies(BSON_ARRAY("_id" - << "a"), - expression, - true); - DepsTracker unused; - // 'path' must be provided for inclusion expressions. - ASSERT_THROWS(expression->addDependencies(&unused), UserException); - } -}; + auto optimized = object->optimize(); + auto optimizedObject = dynamic_cast<ExpressionObject*>(optimized.get()); + ASSERT_TRUE(optimizedObject); + ASSERT_EQ(optimizedObject->getChildExpressions().size(), 1UL); -/** Optimizing an object expression optimizes its sub expressions. */ -class Optimize : public Base { -public: - void run() { - intrusive_ptr<ExpressionObject> expression = ExpressionObject::createRoot(); - // Add inclusion. - expression->includePath("a"); - // Add non inclusion. - intrusive_ptr<Expression> andExpr = new ExpressionAnd(); - expression->addField(mongo::FieldPath("b"), andExpr); - expression->optimize(); - // Optimizing 'expression' optimizes its non inclusion sub expressions, - // while - // inclusion sub expressions are passed through. - ASSERT_EQUALS(BSON("a" << true << "b" << BSON("$const" << true)), - expressionToBson(expression)); - } -}; - -/** Serialize to a BSONObj. */ -class AddToBsonObj : public Base { -public: - void run() { - intrusive_ptr<ExpressionObject> expression = ExpressionObject::createRoot(); - expression->addField(mongo::FieldPath("a"), ExpressionConstant::create(Value(5))); - ASSERT_EQUALS(constify(BSON("foo" << BSON("a" << 5))), - BSON("foo" << expression->serialize(false))); - } -}; - -/** Serialize to a BSONObj, with constants represented by expressions. */ -class AddToBsonObjRequireExpression : public Base { -public: - void run() { - intrusive_ptr<ExpressionObject> expression = ExpressionObject::createRoot(); - expression->addField(mongo::FieldPath("a"), ExpressionConstant::create(Value(5))); - ASSERT_EQUALS(BSON("foo" << BSON("a" << BSON("$const" << 5))), - BSON("foo" << expression->serialize(false))); - } -}; - -/** Serialize to a BSONArray. */ -class AddToBsonArray : public Base { -public: - void run() { - intrusive_ptr<ExpressionObject> expression = ExpressionObject::createRoot(); - expression->addField(mongo::FieldPath("a"), ExpressionConstant::create(Value(5))); - BSONArrayBuilder bab; - bab << expression->serialize(false); - ASSERT_EQUALS(constify(BSON_ARRAY(BSON("a" << 5))), bab.arr()); - } -}; - -/** - * evaluate() does not supply an inclusion document. Inclusion spec'd fields - * are not - * included. (Inclusion specs are not generally expected/allowed in cases where - * evaluate - * is called instead of addToDocument.) - */ -class Evaluate : public Base { -public: - void run() { - intrusive_ptr<ExpressionObject> expression = ExpressionObject::createRoot(); - expression->includePath("a"); - expression->addField(mongo::FieldPath("b"), ExpressionConstant::create(Value(5))); - expression->addField(mongo::FieldPath("c"), ExpressionFieldPath::create("a")); - ASSERT_EQUALS( - BSON("b" << 5 << "c" << 1), - toBson(expression->evaluate(fromBson(BSON("_id" << 0 << "a" << 1))).getDocument())); - } + // We should have optimized {$add: [1, 2]} to just the constant 3. + auto expConstant = + dynamic_cast<ExpressionConstant*>(optimizedObject->getChildExpressions()[0].second.get()); + ASSERT_TRUE(expConstant); + ASSERT_EQ(expConstant->evaluate(Document()), Value(3)); }; } // namespace Object @@ -3499,263 +2850,33 @@ namespace Parse { namespace Object { -class Base { -public: - virtual ~Base() {} - void run() { - BSONObj specObject = BSON("" << spec()); - BSONElement specElement = specObject.firstElement(); - Expression::ObjectCtx context = objectCtx(); - VariablesIdGenerator idGenerator; - VariablesParseState vps(&idGenerator); - intrusive_ptr<Expression> expression = - Expression::parseObject(specElement.Obj(), &context, vps); - ASSERT_EQUALS(expectedBson(), expressionToBson(expression)); - } - -protected: - virtual BSONObj spec() = 0; - virtual Expression::ObjectCtx objectCtx() { - return Expression::ObjectCtx(Expression::ObjectCtx::DOCUMENT_OK); - } - virtual BSONObj expectedBson() { - return constify(spec()); - } -}; - -class ParseError { -public: - virtual ~ParseError() {} - void run() { - BSONObj specObject = BSON("" << spec()); - BSONElement specElement = specObject.firstElement(); - Expression::ObjectCtx context = objectCtx(); - VariablesIdGenerator idGenerator; - VariablesParseState vps(&idGenerator); - ASSERT_THROWS(Expression::parseObject(specElement.Obj(), &context, vps), UserException); - } - -protected: - virtual BSONObj spec() = 0; - virtual Expression::ObjectCtx objectCtx() { - return Expression::ObjectCtx(Expression::ObjectCtx::DOCUMENT_OK); - } -}; - -/** The spec must be an object. */ -class NonObject { -public: - void run() { - BSONObj specObject = BSON("" << 1); - BSONElement specElement = specObject.firstElement(); - Expression::ObjectCtx context = Expression::ObjectCtx(Expression::ObjectCtx::DOCUMENT_OK); - VariablesIdGenerator idGenerator; - VariablesParseState vps(&idGenerator); - ASSERT_THROWS(Expression::parseObject(specElement.Obj(), &context, vps), UserException); - } -}; - -/** Empty object. */ -class Empty : public Base { - BSONObj spec() { - return BSONObj(); - } -}; - -/** Operator spec object. */ -class Operator : public Base { - BSONObj spec() { - return BSON("$and" << BSONArray()); - } -}; - -/** Invalid operator not allowed. */ -class InvalidOperator : public ParseError { - BSONObj spec() { - return BSON("$invalid" << 1); - } -}; - -/** Two operators not allowed. */ -class TwoOperators : public ParseError { - BSONObj spec() { - return BSON("$and" << BSONArray() << "$or" << BSONArray()); - } -}; - -/** An operator must be the first and only field. */ -class OperatorLaterField : public ParseError { - BSONObj spec() { - return BSON("a" << BSON("$and" << BSONArray()) << "$or" << BSONArray()); - } -}; - -/** An operator must be the first and only field. */ -class OperatorAndOtherField : public ParseError { - BSONObj spec() { - return BSON("$and" << BSONArray() << "a" << BSON("$or" << BSONArray())); - } -}; - -/** Operators not allowed at the top level of a projection. */ -class OperatorTopLevel : public ParseError { - BSONObj spec() { - return BSON("$and" << BSONArray()); - } - Expression::ObjectCtx objectCtx() { - return Expression::ObjectCtx(Expression::ObjectCtx::DOCUMENT_OK | - Expression::ObjectCtx::TOP_LEVEL); - } -}; - -/** Dotted fields are not generally allowed. */ -class Dotted : public ParseError { - BSONObj spec() { - return BSON("a.b" << BSON("$and" << BSONArray())); - } -}; - -/** Dotted fields are allowed at the top level. */ -class DottedTopLevel : public Base { - BSONObj spec() { - return BSON("a.b" << BSON("$and" << BSONArray())); - } - Expression::ObjectCtx objectCtx() { - return Expression::ObjectCtx(Expression::ObjectCtx::DOCUMENT_OK | - Expression::ObjectCtx::TOP_LEVEL); - } - BSONObj expectedBson() { - return BSON("a" << BSON("b" << BSON("$and" << BSONArray()))); - } -}; - -/** Nested spec. */ -class Nested : public Base { - BSONObj spec() { - return BSON("a" << BSON("$and" << BSONArray())); - } -}; - -/** Parse error in nested document. */ -class NestedParseError : public ParseError { - BSONObj spec() { - return BSON("a" << BSON("$and" << BSONArray() << "$or" << BSONArray())); - } -}; - -/** FieldPath expression. */ -class FieldPath : public Base { - BSONObj spec() { - return BSON("a" - << "$field"); - } -}; - -/** Invalid FieldPath expression. */ -class InvalidFieldPath : public ParseError { - BSONObj spec() { - return BSON("a" - << "$field."); - } -}; - -/** Non FieldPath string. */ -class NonFieldPathString : public ParseError { - BSONObj spec() { - return BSON("a" - << "foo"); - } -}; - -/** Inclusion spec not allowed. */ -class DisallowedInclusion : public ParseError { - BSONObj spec() { - return BSON("a" << 1); - } -}; - -class InclusionBase : public Base { - Expression::ObjectCtx objectCtx() { - return Expression::ObjectCtx(Expression::ObjectCtx::DOCUMENT_OK | - Expression::ObjectCtx::INCLUSION_OK); - } - BSONObj expectedBson() { - return BSON("a" << true); - } -}; - -/** Inclusion with bool type. */ -class InclusionBool : public InclusionBase { - BSONObj spec() { - return BSON("a" << true); - } -}; - -/** Inclusion with double type. */ -class InclusionDouble : public InclusionBase { - BSONObj spec() { - return BSON("a" << 1.0); - } -}; +/** + * Parses the object given by 'specification', with the options given by 'parseContextOptions'. + */ +boost::intrusive_ptr<Expression> parseObject(BSONObj specification) { + VariablesIdGenerator idGenerator; + VariablesParseState vps(&idGenerator); -/** Inclusion with int type. */ -class InclusionInt : public InclusionBase { - BSONObj spec() { - return BSON("a" << 1); - } + return Expression::parseObject(specification, vps); }; -/** Inclusion with long type. */ -class InclusionLong : public InclusionBase { - BSONObj spec() { - return BSON("a" << 1LL); - } -}; +TEST(ParseObject, ShouldAcceptEmptyObject) { + auto resultExpression = parseObject(BSONObj()); -/** Inclusion of a nested field. */ -class NestedInclusion : public InclusionBase { - BSONObj spec() { - return BSON("a" << BSON("b" << true)); - } - BSONObj expectedBson() { - return spec(); - } -}; + // Should return an empty ExpressionObject. + auto resultObject = dynamic_cast<ExpressionObject*>(resultExpression.get()); + ASSERT_TRUE(resultObject); -/** Exclude _id. */ -class ExcludeId : public Base { - BSONObj spec() { - return BSON("_id" << 0); - } - Expression::ObjectCtx objectCtx() { - return Expression::ObjectCtx(Expression::ObjectCtx::DOCUMENT_OK | - Expression::ObjectCtx::TOP_LEVEL); - } - BSONObj expectedBson() { - return BSON("_id" << false); - } -}; + ASSERT_EQ(resultObject->getChildExpressions().size(), 0UL); +} -/** Excluding non _id field not allowed. */ -class ExcludeNonId : public ParseError { - BSONObj spec() { - return BSON("a" << 0); - } -}; +TEST(ParseObject, ShouldRecognizeKnownExpression) { + auto resultExpression = parseObject(BSON("$and" << BSONArray())); -/** Excluding _id not top level. */ -class ExcludeIdNotTopLevel : public ParseError { - BSONObj spec() { - return BSON("_id" << 0); - } -}; - -/** Invalid value type. */ -class InvalidType : public ParseError { - BSONObj spec() { - return BSON("a" << BSONNULL); - } -}; + // Should return an ExpressionAnd. + auto resultAnd = dynamic_cast<ExpressionAnd*>(resultExpression.get()); + ASSERT_TRUE(resultAnd); +} } // namespace Object @@ -3763,230 +2884,160 @@ namespace Expression { using mongo::Expression; -class Base { -public: - virtual ~Base() {} - void run() { - BSONObj specObject = spec(); - BSONElement specElement = specObject.firstElement(); - VariablesIdGenerator idGenerator; - VariablesParseState vps(&idGenerator); - intrusive_ptr<Expression> expression = Expression::parseExpression(specElement, vps); - ASSERT_EQUALS(constify(expectedBson()), expressionToBson(expression)); - } - -protected: - virtual BSONObj spec() = 0; - virtual BSONObj expectedBson() { - return constify(spec()); - } -}; - -class ParseError { -public: - virtual ~ParseError() {} - void run() { - BSONObj specObject = spec(); - BSONElement specElement = specObject.firstElement(); - VariablesIdGenerator idGenerator; - VariablesParseState vps(&idGenerator); - ASSERT_THROWS(Expression::parseExpression(specElement, vps), UserException); - } +/** + * Parses an expression from the given BSON specification. + */ +boost::intrusive_ptr<Expression> parseExpression(BSONObj specification) { + VariablesIdGenerator idGenerator; + VariablesParseState vps(&idGenerator); + return Expression::parseExpression(specification, vps); +} -protected: - virtual BSONObj spec() = 0; -}; +TEST(ParseExpression, ShouldRecognizeConstExpression) { + auto resultExpression = parseExpression(BSON("$const" << 5)); + auto constExpression = dynamic_cast<ExpressionConstant*>(resultExpression.get()); + ASSERT_TRUE(constExpression); + ASSERT_EQUALS(constExpression->serialize(false), Value(Document{{"$const", 5}})); +} -/** A constant expression. */ -class Const : public Base { - BSONObj spec() { - return BSON("$const" << 5); - } -}; +TEST(ParseExpression, ShouldRejectUnknownExpression) { + ASSERT_THROWS(parseExpression(BSON("$invalid" << 1)), UserException); +} -/** An expression with an invalid name. */ -class InvalidName : public ParseError { - BSONObj spec() { - return BSON("$invalid" << 1); - } -}; +TEST(ParseExpression, ShouldRejectExpressionArgumentsWhichAreNotInArray) { + ASSERT_THROWS(parseExpression(BSON("$strcasecmp" + << "foo")), + UserException); +} -/** An expression requiring an array that is not provided with an array. */ -class RequiredArrayMissing : public ParseError { - BSONObj spec() { - return BSON("$strcasecmp" - << "foo"); - } -}; +TEST(ParseExpression, ShouldRejectExpressionWithWrongNumberOfArguments) { + ASSERT_THROWS(parseExpression(BSON("$strcasecmp" << BSON_ARRAY("foo"))), UserException); +} -/** An expression with the wrong number of operands. */ -class IncorrectOperandCount : public ParseError { - BSONObj spec() { - return BSON("$strcasecmp" << BSON_ARRAY("foo")); - } -}; +TEST(ParseExpression, ShouldRejectObjectWithTwoTopLevelExpressions) { + ASSERT_THROWS(parseExpression(BSON("$and" << BSONArray() << "$or" << BSONArray())), + UserException); +} -/** An expression with the correct number of operands. */ -class CorrectOperandCount : public Base { - BSONObj spec() { - return BSON("$strcasecmp" << BSON_ARRAY("foo" - << "FOO")); - } -}; +TEST(ParseExpression, ShouldRejectExpressionIfItsNotTheOnlyField) { + ASSERT_THROWS(parseExpression(BSON("$and" << BSONArray() << "a" << BSON("$or" << BSONArray()))), + UserException); +} -/** An variable argument expression with zero operands. */ -class ZeroOperands : public Base { - BSONObj spec() { - return BSON("$and" << BSONArray()); - } -}; +TEST(ParseExpression, ShouldParseExpressionWithMultipleArguments) { + auto resultExpression = parseExpression(BSON("$strcasecmp" << BSON_ARRAY("foo" + << "FOO"))); + auto strCaseCmpExpression = dynamic_cast<ExpressionStrcasecmp*>(resultExpression.get()); + ASSERT_TRUE(strCaseCmpExpression); + vector<Value> arguments = {Value(Document{{"$const", "foo"}}), + Value(Document{{"$const", "FOO"}})}; + ASSERT_EQUALS(strCaseCmpExpression->serialize(false), + Value(Document{{"$strcasecmp", arguments}})); +} -/** An variable argument expression with one operand. */ -class OneOperand : public Base { - BSONObj spec() { - return BSON("$and" << BSON_ARRAY(1)); - } -}; +TEST(ParseExpression, ShouldParseExpressionWithNoArguments) { + auto resultExpression = parseExpression(BSON("$and" << BSONArray())); + auto andExpression = dynamic_cast<ExpressionAnd*>(resultExpression.get()); + ASSERT_TRUE(andExpression); + ASSERT_EQUALS(andExpression->serialize(false), Value(Document{{"$and", vector<Value>{}}})); +} -/** An variable argument expression with two operands. */ -class TwoOperands : public Base { - BSONObj spec() { - return BSON("$and" << BSON_ARRAY(1 << 2)); - } -}; +TEST(ParseExpression, ShouldParseExpressionWithOneArgument) { + auto resultExpression = parseExpression(BSON("$and" << BSON_ARRAY(1))); + auto andExpression = dynamic_cast<ExpressionAnd*>(resultExpression.get()); + ASSERT_TRUE(andExpression); + vector<Value> arguments = {Value(Document{{"$const", 1}})}; + ASSERT_EQUALS(andExpression->serialize(false), Value(Document{{"$and", arguments}})); +} -/** An variable argument expression with a singleton operand. */ -class SingletonOperandVariable : public Base { - BSONObj spec() { - return BSON("$and" << 1); - } - BSONObj expectedBson() { - return BSON("$and" << BSON_ARRAY(1)); - } -}; +TEST(ParseExpression, ShouldAcceptArgumentWithoutArrayForVariadicExpressions) { + auto resultExpression = parseExpression(BSON("$and" << 1)); + auto andExpression = dynamic_cast<ExpressionAnd*>(resultExpression.get()); + ASSERT_TRUE(andExpression); + vector<Value> arguments = {Value(Document{{"$const", 1}})}; + ASSERT_EQUALS(andExpression->serialize(false), Value(Document{{"$and", arguments}})); +} -/** An fixed argument expression with a singleton operand. */ -class SingletonOperandFixed : public Base { - BSONObj spec() { - return BSON("$not" << 1); - } - BSONObj expectedBson() { - return BSON("$not" << BSON_ARRAY(1)); - } -}; +TEST(ParseExpression, ShouldAcceptArgumentWithoutArrayAsSingleArgument) { + auto resultExpression = parseExpression(BSON("$not" << 1)); + auto notExpression = dynamic_cast<ExpressionNot*>(resultExpression.get()); + ASSERT_TRUE(notExpression); + vector<Value> arguments = {Value(Document{{"$const", 1}})}; + ASSERT_EQUALS(notExpression->serialize(false), Value(Document{{"$not", arguments}})); +} -/** An object can be provided as a singleton argument. */ -class ObjectSingleton : public Base { - BSONObj spec() { - return BSON("$and" << BSON("$const" << 1)); - } - BSONObj expectedBson() { - return BSON("$and" << BSON_ARRAY(BSON("$const" << 1))); - } -}; +TEST(ParseExpression, ShouldAcceptObjectAsSingleArgument) { + auto resultExpression = parseExpression(BSON("$and" << BSON("$const" << 1))); + auto andExpression = dynamic_cast<ExpressionAnd*>(resultExpression.get()); + ASSERT_TRUE(andExpression); + vector<Value> arguments = {Value(Document{{"$const", 1}})}; + ASSERT_EQUALS(andExpression->serialize(false), Value(Document{{"$and", arguments}})); +} -/** An object can be provided as an array agrument. */ -class ObjectOperand : public Base { - BSONObj spec() { - return BSON("$and" << BSON_ARRAY(BSON("$const" << 1))); - } - BSONObj expectedBson() { - return BSON("$and" << BSON_ARRAY(1)); - } -}; +TEST(ParseExpression, ShouldAcceptObjectInsideArrayAsSingleArgument) { + auto resultExpression = parseExpression(BSON("$and" << BSON_ARRAY(BSON("$const" << 1)))); + auto andExpression = dynamic_cast<ExpressionAnd*>(resultExpression.get()); + ASSERT_TRUE(andExpression); + vector<Value> arguments = {Value(Document{{"$const", 1}})}; + ASSERT_EQUALS(andExpression->serialize(false), Value(Document{{"$and", arguments}})); +} } // namespace Expression namespace Operand { -class Base { -public: - virtual ~Base() {} - void run() { - BSONObj specObject = spec(); - BSONElement specElement = specObject.firstElement(); - VariablesIdGenerator idGenerator; - VariablesParseState vps(&idGenerator); - intrusive_ptr<mongo::Expression> expression = - mongo::Expression::parseOperand(specElement, vps); - ASSERT_EQUALS(expectedBson(), expressionToBson(expression)); - } - -protected: - virtual BSONObj spec() = 0; - virtual BSONObj expectedBson() { - return constify(spec()); - } -}; - -class ParseError { -public: - virtual ~ParseError() {} - void run() { - BSONObj specObject = spec(); - BSONElement specElement = specObject.firstElement(); - VariablesIdGenerator idGenerator; - VariablesParseState vps(&idGenerator); - ASSERT_THROWS(mongo::Expression::parseOperand(specElement, vps), UserException); - } +using mongo::Expression; -protected: - virtual BSONObj spec() = 0; -}; +/** + * Parses an operand from the given BSON specification. The field name is ignored, since it is + * assumed to have come from an array, or to have been the only argument to an expression, in which + * case the field name would be the name of the expression. + */ +intrusive_ptr<Expression> parseOperand(BSONObj specification) { + BSONElement specElement = specification.firstElement(); + VariablesIdGenerator idGenerator; + VariablesParseState vps(&idGenerator); + return Expression::parseOperand(specElement, vps); +} -/** A field path operand. */ -class FieldPath { -public: - void run() { - BSONObj specObject = BSON("" - << "$field"); - BSONElement specElement = specObject.firstElement(); - VariablesIdGenerator idGenerator; - VariablesParseState vps(&idGenerator); - intrusive_ptr<mongo::Expression> expression = - mongo::Expression::parseOperand(specElement, vps); - ASSERT_EQUALS(specObject, BSON("" << expression->serialize(false))); - } -}; +TEST(ParseOperand, ShouldRecognizeFieldPath) { + auto resultExpression = parseOperand(BSON("" + << "$field")); + auto fieldPathExpression = dynamic_cast<ExpressionFieldPath*>(resultExpression.get()); + ASSERT_TRUE(fieldPathExpression); + ASSERT_EQ(fieldPathExpression->serialize(false), Value("$field")); +} -/** A string constant (not field path) operand. */ -class NonFieldPathString : public Base { - BSONObj spec() { - return BSON("" - << "foo"); - } - BSONObj expectedBson() { - return BSON("$const" - << "foo"); - } -}; +TEST(ParseOperand, ShouldRecognizeStringLiteral) { + auto resultExpression = parseOperand(BSON("" + << "foo")); + auto constantExpression = dynamic_cast<ExpressionConstant*>(resultExpression.get()); + ASSERT_TRUE(constantExpression); + ASSERT_EQ(constantExpression->serialize(false), Value(Document{{"$const", "foo"}})); +} -/** An object operand. */ -class Object : public Base { - BSONObj spec() { - return BSON("" << BSON("$and" << BSONArray())); - } - BSONObj expectedBson() { - return BSON("$and" << BSONArray()); - } -}; +TEST(ParseOperand, ShouldRecognizeNestedArray) { + auto resultExpression = parseOperand(BSON("" << BSON_ARRAY("foo" + << "$field"))); + auto arrayExpression = dynamic_cast<ExpressionArray*>(resultExpression.get()); + ASSERT_TRUE(arrayExpression); + vector<Value> expectedSerializedArray = {Value(Document{{"$const", "foo"}}), Value("$field")}; + ASSERT_EQ(arrayExpression->serialize(false), Value(expectedSerializedArray)); +} -/** An inclusion operand. */ -class InclusionObject : public ParseError { - BSONObj spec() { - return BSON("" << BSON("a" << 1)); - } -}; +TEST(ParseOperand, ShouldRecognizeNumberLiteral) { + auto resultExpression = parseOperand(BSON("" << 5)); + auto constantExpression = dynamic_cast<ExpressionConstant*>(resultExpression.get()); + ASSERT_TRUE(constantExpression); + ASSERT_EQ(constantExpression->serialize(false), Value(Document{{"$const", 5}})); +} -/** A constant operand. */ -class Constant : public Base { - BSONObj spec() { - return BSON("" << 5); - } - BSONObj expectedBson() { - return BSON("$const" << 5); - } -}; +TEST(ParseOperand, ShouldRecognizeNestedExpression) { + auto resultExpression = parseOperand(BSON("" << BSON("$and" << BSONArray()))); + auto andExpression = dynamic_cast<ExpressionAnd*>(resultExpression.get()); + ASSERT_TRUE(andExpression); + ASSERT_EQ(andExpression->serialize(false), Value(Document{{"$and", vector<Value>{}}})); +} } // namespace Operand @@ -4016,8 +3067,7 @@ public: const BSONObj obj = BSON(field.first << args); VariablesIdGenerator idGenerator; VariablesParseState vps(&idGenerator); - const intrusive_ptr<Expression> expr = - Expression::parseExpression(obj.firstElement(), vps); + const intrusive_ptr<Expression> expr = Expression::parseExpression(obj, vps); Value result = expr->evaluate(Document()); if (result.getType() == Array) { result = sortSet(result); @@ -4045,7 +3095,7 @@ public: // NOTE: parse and evaluatation failures are treated the // same const intrusive_ptr<Expression> expr = - Expression::parseExpression(obj.firstElement(), vps); + Expression::parseExpression(obj, vps); expr->evaluate(Document()); }, UserException); @@ -4559,7 +3609,7 @@ TEST(ExpressionSubstrCPTest, DoesThrowWithBadContinuationByte) { const auto continuationByte = "\x80\x00"_sd; const auto expr = Expression::parseExpression( - BSON("$substrCP" << BSON_ARRAY(continuationByte << 0 << 1)).firstElement(), vps); + BSON("$substrCP" << BSON_ARRAY(continuationByte << 0 << 1)), vps); ASSERT_THROWS({ expr->evaluate(Document()); }, UserException); } @@ -4568,8 +3618,8 @@ TEST(ExpressionSubstrCPTest, DoesThrowWithInvalidLeadingByte) { VariablesParseState vps(&idGenerator); const auto leadingByte = "\xFF\x00"_sd; - const auto expr = Expression::parseExpression( - BSON("$substrCP" << BSON_ARRAY(leadingByte << 0 << 1)).firstElement(), vps); + const auto expr = + Expression::parseExpression(BSON("$substrCP" << BSON_ARRAY(leadingByte << 0 << 1)), vps); ASSERT_THROWS({ expr->evaluate(Document()); }, UserException); } @@ -4826,8 +3876,7 @@ public: const BSONObj obj = BSON(field.first << args); VariablesIdGenerator idGenerator; VariablesParseState vps(&idGenerator); - const intrusive_ptr<Expression> expr = - Expression::parseExpression(obj.firstElement(), vps); + const intrusive_ptr<Expression> expr = Expression::parseExpression(obj, vps); const Value result = expr->evaluate(Document()); if (result != expected) { string errMsg = str::stream() @@ -4852,7 +3901,7 @@ public: // NOTE: parse and evaluatation failures are treated the // same const intrusive_ptr<Expression> expr = - Expression::parseExpression(obj.firstElement(), vps); + Expression::parseExpression(obj, vps); expr->evaluate(Document()); }, UserException); @@ -5044,52 +4093,6 @@ public: add<FieldPath::AddToBsonObj>(); add<FieldPath::AddToBsonArray>(); - add<Object::Empty>(); - add<Object::Include>(); - add<Object::MissingInclude>(); - add<Object::IncludeId>(); - add<Object::ExcludeId>(); - add<Object::SourceOrder>(); - add<Object::IncludeNested>(); - add<Object::IncludeTwoNested>(); - add<Object::IncludeTwoParentNested>(); - add<Object::IncludeMissingNested>(); - add<Object::IncludeNestedWithinNonObject>(); - add<Object::IncludeArrayNested>(); - add<Object::ExcludeNonRootId>(); - add<Object::Computed>(); - add<Object::ComputedReplacement>(); - add<Object::ComputedUndefined>(); - add<Object::ComputedUndefinedReplacement>(); - add<Object::ComputedNull>(); - add<Object::ComputedNested>(); - add<Object::ComputedFieldPath>(); - add<Object::ComputedNestedFieldPath>(); - add<Object::EmptyNewSubobject>(); - add<Object::NonEmptyNewSubobject>(); - add<Object::AdjacentNestedComputedFields>(); - add<Object::AdjacentDottedAndNestedComputedFields>(); - add<Object::AdjacentNestedAndDottedComputedFields>(); - add<Object::AdjacentDottedComputedFields>(); - add<Object::AdjacentNestedOrdering>(); - add<Object::MultipleNestedFields>(); - add<Object::ConflictingExpressionFields>(); - add<Object::ConflictingInclusionExpressionFields>(); - add<Object::ConflictingExpressionInclusionFields>(); - add<Object::ConflictingObjectConstantExpressionFields>(); - add<Object::ConflictingConstantObjectExpressionFields>(); - add<Object::ConflictingNestedFields>(); - add<Object::ConflictingFieldAndSubfield>(); - add<Object::ConflictingFieldAndNestedField>(); - add<Object::ConflictingSubfieldAndField>(); - add<Object::ConflictingNestedFieldAndField>(); - add<Object::NonInclusionDependencies>(); - add<Object::InclusionDependencies>(); - add<Object::Optimize>(); - add<Object::AddToBsonObj>(); - add<Object::AddToBsonObjRequireExpression>(); - add<Object::AddToBsonArray>(); - add<Object::Evaluate>(); add<Or::NoOperands>(); add<Or::True>(); @@ -5116,49 +4119,6 @@ public: add<Or::Nested>(); add<Or::NestedOne>(); - add<Parse::Object::NonObject>(); - add<Parse::Object::Empty>(); - add<Parse::Object::Operator>(); - add<Parse::Object::InvalidOperator>(); - add<Parse::Object::TwoOperators>(); - add<Parse::Object::OperatorLaterField>(); - add<Parse::Object::OperatorAndOtherField>(); - add<Parse::Object::OperatorTopLevel>(); - add<Parse::Object::Dotted>(); - add<Parse::Object::DottedTopLevel>(); - add<Parse::Object::Nested>(); - add<Parse::Object::NestedParseError>(); - add<Parse::Object::FieldPath>(); - add<Parse::Object::InvalidFieldPath>(); - add<Parse::Object::NonFieldPathString>(); - add<Parse::Object::DisallowedInclusion>(); - add<Parse::Object::InclusionBool>(); - add<Parse::Object::InclusionDouble>(); - add<Parse::Object::InclusionInt>(); - add<Parse::Object::InclusionLong>(); - add<Parse::Object::NestedInclusion>(); - add<Parse::Object::ExcludeId>(); - add<Parse::Object::ExcludeNonId>(); - add<Parse::Object::ExcludeIdNotTopLevel>(); - add<Parse::Object::InvalidType>(); - add<Parse::Expression::Const>(); - add<Parse::Expression::InvalidName>(); - add<Parse::Expression::RequiredArrayMissing>(); - add<Parse::Expression::IncorrectOperandCount>(); - add<Parse::Expression::CorrectOperandCount>(); - add<Parse::Expression::ZeroOperands>(); - add<Parse::Expression::OneOperand>(); - add<Parse::Expression::TwoOperands>(); - add<Parse::Expression::SingletonOperandVariable>(); - add<Parse::Expression::SingletonOperandFixed>(); - add<Parse::Expression::ObjectSingleton>(); - add<Parse::Expression::ObjectOperand>(); - add<Parse::Operand::FieldPath>(); - add<Parse::Operand::NonFieldPathString>(); - add<Parse::Operand::Object>(); - add<Parse::Operand::InclusionObject>(); - add<Parse::Operand::Constant>(); - add<Strcasecmp::NullBegin>(); add<Strcasecmp::NullEnd>(); add<Strcasecmp::NullMiddleLt>(); diff --git a/src/mongo/db/pipeline/field_path.cpp b/src/mongo/db/pipeline/field_path.cpp index c2c934de41a..4fa7d699b52 100644 --- a/src/mongo/db/pipeline/field_path.cpp +++ b/src/mongo/db/pipeline/field_path.cpp @@ -30,6 +30,7 @@ #include "mongo/db/pipeline/field_path.h" +#include "mongo/base/string_data.h" #include "mongo/util/mongoutils/str.h" namespace mongo { @@ -43,76 +44,88 @@ using namespace mongoutils; const char FieldPath::prefix[] = "$"; -FieldPath::FieldPath(const vector<string>& fieldPath) { - massert(16409, "FieldPath cannot be constructed from an empty vector.", !fieldPath.empty()); - vFieldName.reserve(fieldPath.size()); - for (vector<string>::const_iterator i = fieldPath.begin(); i != fieldPath.end(); ++i) { - pushFieldName(*i); +std::string FieldPath::getFullyQualifiedPath(StringData prefix, StringData suffix) { + if (prefix.empty()) { + return suffix.toString(); + } + return str::stream() << prefix << "." << suffix; +} + +FieldPath::FieldPath(const vector<string>& fieldNames) { + massert(16409, "FieldPath cannot be constructed from an empty vector.", !fieldNames.empty()); + _fieldNames.reserve(fieldNames.size()); + for (auto fieldName : fieldNames) { + pushFieldName(fieldName); } - verify(getPathLength() > 0); } FieldPath::FieldPath(const string& fieldPath) { - /* - The field path could be using dot notation. - Break the field path up by peeling off successive pieces. - */ + // Split 'fieldPath' at the dots. size_t startpos = 0; while (true) { - /* find the next dot */ + // Find the next dot. const size_t dotpos = fieldPath.find('.', startpos); - /* if there are no more dots, use the remainder of the string */ + // If there are no more dots, use the remainder of the string. if (dotpos == fieldPath.npos) { string lastFieldName = fieldPath.substr(startpos, dotpos); pushFieldName(lastFieldName); break; } - /* use the string up to the dot */ + // Use the string up to the dot. const size_t length = dotpos - startpos; string nextFieldName = fieldPath.substr(startpos, length); pushFieldName(nextFieldName); - /* next time, search starting one spot after that */ + // Start the next search after the dot. startpos = dotpos + 1; } verify(getPathLength() > 0); } -string FieldPath::getPath(bool fieldPrefix) const { +string FieldPath::fullPath() const { stringstream ss; - writePath(ss, fieldPrefix); + const bool includePrefix = false; + writePath(ss, includePrefix); return ss.str(); } -void FieldPath::writePath(ostream& outStream, bool fieldPrefix) const { - if (fieldPrefix) +string FieldPath::fullPathWithPrefix() const { + stringstream ss; + const bool includePrefix = true; + writePath(ss, includePrefix); + return ss.str(); +} + +void FieldPath::writePath(ostream& outStream, bool includePrefix) const { + if (includePrefix) outStream << prefix; - const size_t n = vFieldName.size(); + const size_t n = _fieldNames.size(); verify(n > 0); - outStream << vFieldName[0]; + outStream << _fieldNames[0]; for (size_t i = 1; i < n; ++i) - outStream << '.' << vFieldName[i]; + outStream << '.' << _fieldNames[i]; } FieldPath FieldPath::tail() const { - vector<string> allButFirst(vFieldName.begin() + 1, vFieldName.end()); + vector<string> allButFirst(_fieldNames.begin() + 1, _fieldNames.end()); return FieldPath(allButFirst); } -void FieldPath::uassertValidFieldName(const string& fieldName) { - uassert(15998, "FieldPath field names may not be empty strings.", fieldName.length() > 0); +void FieldPath::uassertValidFieldName(StringData fieldName) { + uassert(15998, "FieldPath field names may not be empty strings.", !fieldName.empty()); uassert(16410, "FieldPath field names may not start with '$'.", fieldName[0] != '$'); uassert( 16411, "FieldPath field names may not contain '\0'.", fieldName.find('\0') == string::npos); - uassert(16412, "FieldPath field names may not contain '.'.", !str::contains(fieldName, '.')); + uassert( + 16412, "FieldPath field names may not contain '.'.", fieldName.find('.') == string::npos); } void FieldPath::pushFieldName(const string& fieldName) { uassertValidFieldName(fieldName); - vFieldName.push_back(fieldName); + _fieldNames.push_back(fieldName); } } diff --git a/src/mongo/db/pipeline/field_path.h b/src/mongo/db/pipeline/field_path.h index 3f26891cd2b..1ac637839de 100644 --- a/src/mongo/db/pipeline/field_path.h +++ b/src/mongo/db/pipeline/field_path.h @@ -36,57 +36,72 @@ namespace mongo { +class StringData; + +/** + * Utility class which represents a field path with nested paths separated by dots. + */ class FieldPath { public: /** - * Constructor. - * - * @param fieldPath the dotted field path std::string or non empty pre-split vector. - * The constructed object will have getPathLength() > 0. - * Uassert if any component field names do not pass validation. + * Throws a UserException if a field name does not pass validation. */ - FieldPath(const std::string& fieldPath); - FieldPath(const std::vector<std::string>& fieldPath); + static void uassertValidFieldName(StringData fieldName); /** - Get the number of path elements in the field path. - - @returns the number of path elements + * Concatenates 'prefix' and 'suffix' using dotted path notation. 'prefix' is allowed to be + * empty. */ - size_t getPathLength() const; + static std::string getFullyQualifiedPath(StringData prefix, StringData suffix); /** - Get a particular path element from the path. - - @param i the zero based index of the path element. - @returns the path element + * Throws a UserException if the string is empty or if any of the field names fail validation. + * + * Field names are validated using uassertValidFieldName(). */ - const std::string& getFieldName(size_t i) const; + FieldPath(const std::string& fieldPath); /** - Get the full path. + * Throws a UserException if 'fieldNames' is empty or if any of the field names fail validation. + * + * Field names are validated using uassertValidFieldName(). + */ + FieldPath(const std::vector<std::string>& fieldNames); - @param fieldPrefix whether or not to include the field prefix - @returns the complete field path + /** + * Returns the number of path elements in the field path. */ - std::string getPath(bool fieldPrefix) const; + size_t getPathLength() const { + return _fieldNames.size(); + } /** - Write the full path. + * Return the ith field name from this path using zero-based indexes. + */ + const std::string& getFieldName(size_t i) const { + dassert(i < getPathLength()); + return _fieldNames[i]; + } - @param outStream where to write the path to - @param fieldPrefix whether or not to include the field prefix - */ - void writePath(std::ostream& outStream, bool fieldPrefix) const; + /** + * Returns the full path, not including the prefix 'FieldPath::prefix'. + */ + std::string fullPath() const; /** - Get the prefix string. + * Returns the full path, including the prefix 'FieldPath::prefix'. + */ + std::string fullPathWithPrefix() const; - @returns the prefix string + /** + * Write the full path to 'outStream', including the prefix 'FieldPath::prefix' if + * 'includePrefix' is specified. */ - static const char* getPrefix(); + void writePath(std::ostream& outStream, bool includePrefix) const; - static const char prefix[]; + static const char* getPrefix() { + return prefix; + } /** * A FieldPath like this but missing the first element (useful for recursion). @@ -94,35 +109,17 @@ public: */ FieldPath tail() const; - /** Uassert if a field name does not pass validation. */ - static void uassertValidFieldName(const std::string& fieldName); - private: /** * Push a new field name to the back of the vector of names comprising the field path. - * Uassert if 'fieldName' does not pass validation. + * + * Throws a UserException if 'fieldName' does not pass validation done by + * uassertValidFieldName(). */ void pushFieldName(const std::string& fieldName); - std::vector<std::string> vFieldName; -}; -} - - -/* ======================= INLINED IMPLEMENTATIONS ========================== */ - -namespace mongo { - -inline size_t FieldPath::getPathLength() const { - return vFieldName.size(); -} - -inline const std::string& FieldPath::getFieldName(size_t i) const { - dassert(i < getPathLength()); - return vFieldName[i]; -} + static const char prefix[]; -inline const char* FieldPath::getPrefix() { - return prefix; -} + std::vector<std::string> _fieldNames; +}; } diff --git a/src/mongo/db/pipeline/field_path_test.cpp b/src/mongo/db/pipeline/field_path_test.cpp index 92ba167f562..83df46410ae 100644 --- a/src/mongo/db/pipeline/field_path_test.cpp +++ b/src/mongo/db/pipeline/field_path_test.cpp @@ -61,8 +61,8 @@ public: FieldPath path("foo"); ASSERT_EQUALS(1U, path.getPathLength()); ASSERT_EQUALS("foo", path.getFieldName(0)); - ASSERT_EQUALS("foo", path.getPath(false)); - ASSERT_EQUALS("$foo", path.getPath(true)); + ASSERT_EQUALS("foo", path.fullPath()); + ASSERT_EQUALS("$foo", path.fullPathWithPrefix()); } }; @@ -74,7 +74,7 @@ public: FieldPath path(vec); ASSERT_EQUALS(1U, path.getPathLength()); ASSERT_EQUALS("foo", path.getFieldName(0)); - ASSERT_EQUALS("foo", path.getPath(false)); + ASSERT_EQUALS("foo", path.fullPath()); } }; @@ -102,8 +102,8 @@ public: ASSERT_EQUALS(2U, path.getPathLength()); ASSERT_EQUALS("foo", path.getFieldName(0)); ASSERT_EQUALS("bar", path.getFieldName(1)); - ASSERT_EQUALS("foo.bar", path.getPath(false)); - ASSERT_EQUALS("$foo.bar", path.getPath(true)); + ASSERT_EQUALS("foo.bar", path.fullPath()); + ASSERT_EQUALS("$foo.bar", path.fullPathWithPrefix()); } }; @@ -125,7 +125,7 @@ public: vec.push_back("bar"); FieldPath path(vec); ASSERT_EQUALS(2U, path.getPathLength()); - ASSERT_EQUALS("foo.bar", path.getPath(false)); + ASSERT_EQUALS("foo.bar", path.fullPath()); } }; @@ -146,7 +146,7 @@ public: ASSERT_EQUALS("foo", path.getFieldName(0)); ASSERT_EQUALS("bar", path.getFieldName(1)); ASSERT_EQUALS("baz", path.getFieldName(2)); - ASSERT_EQUALS("foo.bar.baz", path.getPath(false)); + ASSERT_EQUALS("foo.bar.baz", path.fullPath()); } }; @@ -180,7 +180,7 @@ public: void run() { FieldPath path("foo.a.bar"); ASSERT_EQUALS(3U, path.getPathLength()); - ASSERT_EQUALS("foo.a.bar", path.getPath(false)); + ASSERT_EQUALS("foo.a.bar", path.fullPath()); } }; @@ -209,7 +209,7 @@ public: void run() { FieldPath path = FieldPath("foo.bar").tail(); ASSERT_EQUALS(1U, path.getPathLength()); - ASSERT_EQUALS("bar", path.getPath(false)); + ASSERT_EQUALS("bar", path.fullPath()); } }; @@ -219,7 +219,7 @@ public: void run() { FieldPath path = FieldPath("foo.bar.baz").tail(); ASSERT_EQUALS(2U, path.getPathLength()); - ASSERT_EQUALS("bar.baz", path.getPath(false)); + ASSERT_EQUALS("bar.baz", path.fullPath()); } }; diff --git a/src/mongo/db/pipeline/parsed_aggregation_projection.cpp b/src/mongo/db/pipeline/parsed_aggregation_projection.cpp new file mode 100644 index 00000000000..c43d626f2b3 --- /dev/null +++ b/src/mongo/db/pipeline/parsed_aggregation_projection.cpp @@ -0,0 +1,231 @@ +/** + * Copyright (C) 2016 MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include <boost/optional.hpp> +#include <string> +#include <unordered_set> + +#include "mongo/db/pipeline/parsed_aggregation_projection.h" + +#include "mongo/bson/bsonelement.h" +#include "mongo/bson/bsonobj.h" +#include "mongo/db/matcher/expression_algo.h" +#include "mongo/db/pipeline/field_path.h" +#include "mongo/db/pipeline/parsed_exclusion_projection.h" +#include "mongo/db/pipeline/parsed_inclusion_projection.h" +#include "mongo/util/assert_util.h" +#include "mongo/util/mongoutils/str.h" + +namespace mongo { +namespace parsed_aggregation_projection { + +namespace { + +/** + * This class is responsible for determining if the provided specification is valid, and determining + * whether it specifies an inclusion projection or an exclusion projection. + */ +class ProjectSpecTypeParser { +public: + /** + * Parses 'spec' to determine whether it is an inclusion or exclusion projection. + * + * Throws a UserException if the specification is invalid. + */ + static ProjectionType parse(const BSONObj& spec) { + ProjectSpecTypeParser parser(spec); + parser.parse(); + invariant(parser._parsedType); + return *(parser._parsedType); + } + +private: + ProjectSpecTypeParser(const BSONObj& spec) : _rawObj(spec) {} + + /** + * Uses '_seenPaths' to see if 'path' conflicts with any paths that have already been specified. + * + * For example, a user is not allowed to specify {'a': 1, 'a.b': 1}, or some similar conflicting + * paths. + */ + void ensurePathDoesNotConflictOrThrow(StringData path) { + for (auto&& seenPath : _seenPaths) { + uassert(40176, + str::stream() << "$project specification contains two conflicting paths. " + "Cannot specify both '" + << path + << "' and '" + << seenPath + << "': " + << _rawObj.toString(), + path != seenPath && !expression::isPathPrefixOf(path, seenPath) && + !expression::isPathPrefixOf(seenPath, path)); + } + } + + /** + * Traverses '_rawObj' to determine the type of projection, populating '_parsedType' in the + * process. + * + * Throws a UserException if an invalid projection specification is detected. + */ + void parse() { + uassert(40177, "$project specification must have at least one field", !_rawObj.isEmpty()); + + for (auto&& elem : _rawObj) { + parseElement(elem, elem.fieldName()); + } + + // Default to inclusion if nothing (except maybe '_id') is explicitly included or excluded. + if (!_parsedType) { + _parsedType = ProjectionType::kInclusion; + } + } + + /** + * Parses a single BSONElement. 'pathToElem' should include the field name of 'elem'. + * + * Delegates to parseSubObject() if 'elem' is an object. Otherwise adds the full path to 'elem' + * to '_seenPaths', and updates '_parsedType' if appropriate. + * + * Throws a UserException if the path to 'elem' conflicts with a path that has already been + * specified, or if this element represents a mix of projection types. + */ + void parseElement(const BSONElement& elem, StringData pathToElem) { + if (elem.type() == BSONType::Object) { + return parseNestedObject(elem.Obj(), pathToElem); + } + + if ((elem.isBoolean() || elem.isNumber()) && !elem.trueValue()) { + // A top-level exclusion of "_id" is allowed in either an inclusion projection or an + // exclusion projection, so doesn't affect '_parsedType'. + if (pathToElem != "_id") { + uassert(40178, + str::stream() << "Bad projection specification, cannot exclude fields " + "other than '_id' in an inclusion projection: " + << _rawObj.toString(), + !_parsedType || (*_parsedType == ProjectionType::kExclusion)); + _parsedType = ProjectionType::kExclusion; + } + } else { + // A boolean true, a truthy numeric value, or any expression can only be used with an + // inclusion projection. Note that literal values like "string" or null are also treated + // as expressions. + uassert(40179, + str::stream() << "Bad projection specification, cannot include fields or " + "add computed fields during an exclusion projection: " + << _rawObj.toString(), + !_parsedType || (*_parsedType == ProjectionType::kInclusion)); + _parsedType = ProjectionType::kInclusion; + } + ensurePathDoesNotConflictOrThrow(pathToElem); + _seenPaths.insert(pathToElem.toString()); + } + + /** + * Traverses 'thisLevelSpec', parsing each element in turn. + * + * Throws a UserException if any paths conflict with each other or existing paths, + * 'thisLevelSpec' contains a dotted path, or if 'thisLevelSpec' represents an invalid + * expression. + */ + void parseNestedObject(const BSONObj& thisLevelSpec, StringData prefix) { + uassert(40180, + str::stream() << "an empty object is not a valid value in a $project. Found " + "empty object at path " + << prefix, + !thisLevelSpec.isEmpty()); + + for (auto elem : thisLevelSpec) { + auto fieldName = elem.fieldNameStringData(); + if (fieldName[0] == '$') { + // This object is an expression specification like {$add: [...]}. It will be parsed + // into an Expression later, but for now, just track that the prefix has been + // specified and skip it. + uassert(40181, + str::stream() + << "Bad projection specification: An expression specification must " + "contain exactly one field, the name of the expression. Found " + << thisLevelSpec.nFields() + << " fields in " + << thisLevelSpec.toString() + << ", while parsing $project object " + << _rawObj.toString(), + thisLevelSpec.nFields() == 1); + uassert(40182, + str::stream() << "Bad projection specification, cannot include fields or " + "add computed fields during an exclusion projection: " + << _rawObj.toString(), + !_parsedType || _parsedType == ProjectionType::kInclusion); + ensurePathDoesNotConflictOrThrow(prefix); + _seenPaths.insert(prefix.toString()); + continue; + } + + uassert(40183, + str::stream() << "cannot use dotted field name '" << fieldName + << "' in a sub object of a $project stage: " + << _rawObj.toString(), + fieldName.find('.') == std::string::npos); + + parseElement(elem, FieldPath::getFullyQualifiedPath(prefix, fieldName)); + } + } + + // The original object. Used to generate more helpful error messages. + const BSONObj& _rawObj; + + // This will be populated during parse(). + boost::optional<ProjectionType> _parsedType; + + // Tracks which paths we've seen to ensure no two paths conflict with each other. + std::unordered_set<std::string> _seenPaths; +}; + +} // namespace + +std::unique_ptr<ParsedAggregationProjection> ParsedAggregationProjection::create( + const BSONObj& spec) { + // Check for any conflicting specifications, and determine the type of the projection. + auto projectionType = ProjectSpecTypeParser::parse(spec); + + // We can't use make_unique() here, since the branches have different types. + std::unique_ptr<ParsedAggregationProjection> parsedProject( + projectionType == ProjectionType::kInclusion + ? static_cast<ParsedAggregationProjection*>(new ParsedInclusionProjection()) + : static_cast<ParsedAggregationProjection*>(new ParsedExclusionProjection())); + + // Actually parse the specification. + parsedProject->parse(spec); + return parsedProject; +} + +} // namespace parsed_aggregation_projection +} // namespace mongo diff --git a/src/mongo/db/pipeline/parsed_aggregation_projection.h b/src/mongo/db/pipeline/parsed_aggregation_projection.h new file mode 100644 index 00000000000..9149ff7a53c --- /dev/null +++ b/src/mongo/db/pipeline/parsed_aggregation_projection.h @@ -0,0 +1,96 @@ +/** + * Copyright (C) 2016 MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <memory> + +namespace mongo { + +class BSONObj; +struct DepsTracker; +class Document; + +namespace parsed_aggregation_projection { + +enum class ProjectionType { kExclusion, kInclusion }; + +/** + * A ParsedAggregationProjection is responsible for parsing and executing a projection. It + * represents either an inclusion or exclusion projection. This is the common interface between the + * two types of projections. + */ +class ParsedAggregationProjection { +public: + /** + * Main entry point for a ParsedAggregationProjection. + * + * Throws a UserException if 'spec' is an invalid projection specification. + */ + static std::unique_ptr<ParsedAggregationProjection> create(const BSONObj& spec); + + virtual ~ParsedAggregationProjection() = default; + + /** + * Returns the type of projection represented by this ParsedAggregationProjection. + */ + virtual ProjectionType getType() const = 0; + + /** + * Parse the user-specified BSON object 'spec'. By the time this is called, 'spec' has already + * been verified to not have any conflicting path specifications, and not to mix and match + * inclusions and exclusions. 'variablesParseState' is used by any contained expressions to + * track which variables are defined so that they can later be referenced at execution time. + */ + virtual void parse(const BSONObj& spec) = 0; + + /** + * Serialize this projection. + */ + virtual Document serialize(bool explain = false) const = 0; + + /** + * Optimize any expressions contained within this projection. + */ + virtual void optimize() {} + + /** + * Add any dependencies needed by this projection or any sub-expressions to 'deps'. + */ + virtual void addDependencies(DepsTracker* deps) const {} + + /** + * Apply the projection to 'input'. + */ + virtual Document applyProjection(Document input) const = 0; + +protected: + ParsedAggregationProjection() = default; +}; +} // namespace parsed_aggregation_projection +} // namespace mongo diff --git a/src/mongo/db/pipeline/parsed_aggregation_projection_test.cpp b/src/mongo/db/pipeline/parsed_aggregation_projection_test.cpp new file mode 100644 index 00000000000..fa3002768d7 --- /dev/null +++ b/src/mongo/db/pipeline/parsed_aggregation_projection_test.cpp @@ -0,0 +1,395 @@ +/** + * Copyright (C) 2016 MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/pipeline/parsed_aggregation_projection.h" + +#include <vector> + +#include "mongo/bson/bsonmisc.h" +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/bson/json.h" +#include "mongo/db/pipeline/document.h" +#include "mongo/db/pipeline/value.h" +#include "mongo/unittest/unittest.h" + +namespace mongo { +namespace parsed_aggregation_projection { +namespace { + +template <typename T> +BSONObj wrapInLiteral(const T& arg) { + return BSON("$literal" << arg); +} + +// +// Error cases. +// + +TEST(ParsedAggregationProjectionErrors, ShouldRejectDuplicateFieldNames) { + // Include/exclude the same field twice. + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("a" << true << "a" << true)), + UserException); + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("a" << false << "a" << false)), + UserException); + ASSERT_THROWS( + ParsedAggregationProjection::create(BSON("a" << BSON("b" << false << "b" << false))), + UserException); + + // Mix of include/exclude and adding a field. + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("a" << wrapInLiteral(1) << "a" << true)), + UserException); + ASSERT_THROWS( + ParsedAggregationProjection::create(BSON("a" << false << "a" << wrapInLiteral(0))), + UserException); + + // Adding the same field twice. + ASSERT_THROWS(ParsedAggregationProjection::create( + BSON("a" << wrapInLiteral(1) << "a" << wrapInLiteral(0))), + UserException); +} + +TEST(ParsedAggregationProjectionErrors, ShouldRejectDuplicateIds) { + // Include/exclude _id twice. + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("_id" << true << "_id" << true)), + UserException); + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("_id" << false << "_id" << false)), + UserException); + + // Mix of including/excluding and adding _id. + ASSERT_THROWS( + ParsedAggregationProjection::create(BSON("_id" << wrapInLiteral(1) << "_id" << true)), + UserException); + ASSERT_THROWS( + ParsedAggregationProjection::create(BSON("_id" << false << "_id" << wrapInLiteral(0))), + UserException); + + // Adding _id twice. + ASSERT_THROWS(ParsedAggregationProjection::create( + BSON("_id" << wrapInLiteral(1) << "_id" << wrapInLiteral(0))), + UserException); +} + +TEST(ParsedAggregationProjectionErrors, ShouldRejectFieldsWithSharedPrefix) { + // Include/exclude Fields with a shared prefix. + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("a" << true << "a.b" << true)), + UserException); + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("a.b" << false << "a" << false)), + UserException); + + // Mix of include/exclude and adding a shared prefix. + ASSERT_THROWS( + ParsedAggregationProjection::create(BSON("a" << wrapInLiteral(1) << "a.b" << true)), + UserException); + ASSERT_THROWS( + ParsedAggregationProjection::create(BSON("a.b" << false << "a" << wrapInLiteral(0))), + UserException); + + // Adding a shared prefix twice. + ASSERT_THROWS(ParsedAggregationProjection::create( + BSON("a" << wrapInLiteral(1) << "a.b" << wrapInLiteral(0))), + UserException); + ASSERT_THROWS(ParsedAggregationProjection::create( + BSON("a.b.c.d" << wrapInLiteral(1) << "a.b.c" << wrapInLiteral(0))), + UserException); +} + +TEST(ParsedAggregationProjectionErrors, ShouldRejectMixOfIdAndSubFieldsOfId) { + // Include/exclude _id twice. + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("_id" << true << "_id.x" << true)), + UserException); + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("_id.x" << false << "_id" << false)), + UserException); + + // Mix of including/excluding and adding _id. + ASSERT_THROWS( + ParsedAggregationProjection::create(BSON("_id" << wrapInLiteral(1) << "_id.x" << true)), + UserException); + ASSERT_THROWS( + ParsedAggregationProjection::create(BSON("_id.x" << false << "_id" << wrapInLiteral(0))), + UserException); + + // Adding _id twice. + ASSERT_THROWS(ParsedAggregationProjection::create( + BSON("_id" << wrapInLiteral(1) << "_id.x" << wrapInLiteral(0))), + UserException); + ASSERT_THROWS(ParsedAggregationProjection::create( + BSON("_id.b.c.d" << wrapInLiteral(1) << "_id.b.c" << wrapInLiteral(0))), + UserException); +} + +TEST(ParsedAggregationProjectionErrors, ShouldRejectMixOfInclusionAndExclusion) { + // Simple mix. + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("a" << true << "b" << false)), + UserException); + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("a" << false << "b" << true)), + UserException); + ASSERT_THROWS( + ParsedAggregationProjection::create(BSON("a" << BSON("b" << false << "c" << true))), + UserException); + ASSERT_THROWS( + ParsedAggregationProjection::create(BSON("_id" << BSON("b" << false << "c" << true))), + UserException); + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("_id.b" << false << "a.c" << true)), + UserException); + + // Mix while also adding a field. + ASSERT_THROWS(ParsedAggregationProjection::create( + BSON("a" << true << "b" << wrapInLiteral(1) << "c" << false)), + UserException); + ASSERT_THROWS(ParsedAggregationProjection::create( + BSON("a" << false << "b" << wrapInLiteral(1) << "c" << true)), + UserException); + + // Mixing "_id" inclusion with exclusion. + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("_id" << true << "a" << false)), + UserException); + + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("a" << false << "_id" << true)), + UserException); + + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("_id" << true << "a.b.c" << false)), + UserException); + + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("_id.x" << true << "a.b.c" << false)), + UserException); +} + +TEST(ParsedAggregationProjectionType, ShouldRejectMixOfExclusionAndComputedFields) { + ASSERT_THROWS( + ParsedAggregationProjection::create(BSON("a" << false << "b" << wrapInLiteral(1))), + UserException); + + ASSERT_THROWS( + ParsedAggregationProjection::create(BSON("a.b" << false << "a.c" << wrapInLiteral(1))), + UserException); + + ASSERT_THROWS(ParsedAggregationProjection::create( + BSON("a" << BSON("b" << false << "c" << wrapInLiteral(1)))), + UserException); +} + +TEST(ParsedAggregationProjectionErrors, ShouldRejectDottedFieldInSubDocument) { + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("a" << BSON("b.c" << true))), + UserException); + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("a" << BSON("b.c" << wrapInLiteral(1)))), + UserException); +} + +TEST(ParsedAggregationProjectionErrors, ShouldRejectTopLevelExpression) { + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("$add" << BSON_ARRAY(4 << 2))), + UserException); +} + +TEST(ParsedAggregationProjectionErrors, ShouldRejectExpressionWithMultipleFieldNames) { + ASSERT_THROWS(ParsedAggregationProjection::create( + BSON("a" << BSON("$add" << BSON_ARRAY(4 << 2) << "b" << 1))), + UserException); + ASSERT_THROWS(ParsedAggregationProjection::create( + BSON("a" << BSON("b" << 1 << "$add" << BSON_ARRAY(4 << 2)))), + UserException); + ASSERT_THROWS(ParsedAggregationProjection::create( + BSON("a" << BSON("b" << BSON("c" << 1 << "$add" << BSON_ARRAY(4 << 2))))), + UserException); + ASSERT_THROWS(ParsedAggregationProjection::create( + BSON("a" << BSON("b" << BSON("$add" << BSON_ARRAY(4 << 2) << "c" << 1)))), + UserException); +} + +TEST(ParsedAggregationProjectionErrors, ShouldRejectEmptyProjection) { + ASSERT_THROWS(ParsedAggregationProjection::create(BSONObj()), UserException); +} + +TEST(ParsedAggregationProjectionErrors, ShouldRejectEmptyNestedObject) { + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("a" << BSONObj())), UserException); + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("a" << false << "b" << BSONObj())), + UserException); + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("a" << true << "b" << BSONObj())), + UserException); + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("a.b" << BSONObj())), UserException); + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("a" << BSON("b" << BSONObj()))), + UserException); +} + +TEST(ParsedAggregationProjectionErrors, ShouldErrorOnInvalidExpression) { + ASSERT_THROWS(ParsedAggregationProjection::create( + BSON("a" << false << "b" << BSON("$unknown" << BSON_ARRAY(4 << 2)))), + UserException); + ASSERT_THROWS(ParsedAggregationProjection::create( + BSON("a" << true << "b" << BSON("$unknown" << BSON_ARRAY(4 << 2)))), + UserException); +} + +TEST(ParsedAggregationProjectionErrors, ShouldErrorOnInvalidFieldPath) { + // Empty field names. + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("" << wrapInLiteral(2))), UserException); + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("" << true)), UserException); + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("" << false)), UserException); + + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("a" << BSON("" << true))), + UserException); + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("a" << BSON("" << false))), + UserException); + + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("" << BSON("a" << true))), + UserException); + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("" << BSON("a" << false))), + UserException); + + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("a." << true)), UserException); + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("a." << false)), UserException); + + ASSERT_THROWS(ParsedAggregationProjection::create(BSON(".a" << true)), UserException); + ASSERT_THROWS(ParsedAggregationProjection::create(BSON(".a" << false)), UserException); + + // Not testing field names with null bytes, since that is invalid BSON, and won't make it to the + // $project stage without a previous error. + + // Field names starting with '$'. + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("$x" << wrapInLiteral(2))), + UserException); + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("c.$d" << true)), UserException); + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("c.$d" << false)), UserException); +} + +TEST(ParsedAggregationProjectionErrors, ShouldErrorOnProjectionWithNoOutputFields) { + // This is treated as an inclusion projection without any fields, so should error. + ASSERT_THROWS(ParsedAggregationProjection::create(BSON("_id" << false)), UserException); +} + +TEST(ParsedAggregationProjectionErrors, ShouldNotErrorOnTwoNestedFields) { + ParsedAggregationProjection::create(BSON("a.b" << true << "a.c" << true)); + ParsedAggregationProjection::create(BSON("a.b" << true << "a" << BSON("c" << true))); +} + +// +// Determining exclusion vs. inclusion. +// + +TEST(ParsedAggregationProjectionType, ShouldDefaultToInclusionProjection) { + auto parsedProject = ParsedAggregationProjection::create(BSON("_id" << true)); + ASSERT(parsedProject->getType() == ProjectionType::kInclusion); + + parsedProject = ParsedAggregationProjection::create(BSON("_id" << wrapInLiteral(1))); + ASSERT(parsedProject->getType() == ProjectionType::kInclusion); + + parsedProject = ParsedAggregationProjection::create(BSON("a" << wrapInLiteral(1))); + ASSERT(parsedProject->getType() == ProjectionType::kInclusion); +} + +TEST(ParsedAggregationProjectionType, ShouldDetectExclusionProjection) { + auto parsedProject = ParsedAggregationProjection::create(BSON("a" << false)); + ASSERT(parsedProject->getType() == ProjectionType::kExclusion); + + parsedProject = ParsedAggregationProjection::create(BSON("_id.x" << false)); + ASSERT(parsedProject->getType() == ProjectionType::kExclusion); + + parsedProject = ParsedAggregationProjection::create(BSON("_id" << BSON("x" << false))); + ASSERT(parsedProject->getType() == ProjectionType::kExclusion); + + parsedProject = ParsedAggregationProjection::create(BSON("x" << BSON("_id" << false))); + ASSERT(parsedProject->getType() == ProjectionType::kExclusion); +} + +TEST(ParsedAggregationProjectionType, ShouldDetectInclusionProjection) { + auto parsedProject = ParsedAggregationProjection::create(BSON("a" << true)); + ASSERT(parsedProject->getType() == ProjectionType::kInclusion); + + parsedProject = ParsedAggregationProjection::create(BSON("_id" << false << "a" << true)); + ASSERT(parsedProject->getType() == ProjectionType::kInclusion); + + parsedProject = ParsedAggregationProjection::create(BSON("_id" << false << "a.b.c" << true)); + ASSERT(parsedProject->getType() == ProjectionType::kInclusion); + + parsedProject = ParsedAggregationProjection::create(BSON("_id.x" << true)); + ASSERT(parsedProject->getType() == ProjectionType::kInclusion); + + parsedProject = ParsedAggregationProjection::create(BSON("_id" << BSON("x" << true))); + ASSERT(parsedProject->getType() == ProjectionType::kInclusion); + + parsedProject = ParsedAggregationProjection::create(BSON("x" << BSON("_id" << true))); + ASSERT(parsedProject->getType() == ProjectionType::kInclusion); +} + +TEST(ParsedAggregationProjectionType, ShouldTreatOnlyComputedFieldsAsAnInclusionProjection) { + auto parsedProject = ParsedAggregationProjection::create(BSON("a" << wrapInLiteral(1))); + ASSERT(parsedProject->getType() == ProjectionType::kInclusion); + + parsedProject = + ParsedAggregationProjection::create(BSON("_id" << false << "a" << wrapInLiteral(1))); + ASSERT(parsedProject->getType() == ProjectionType::kInclusion); + + parsedProject = + ParsedAggregationProjection::create(BSON("_id" << false << "a.b.c" << wrapInLiteral(1))); + ASSERT(parsedProject->getType() == ProjectionType::kInclusion); + + parsedProject = ParsedAggregationProjection::create(BSON("_id.x" << wrapInLiteral(1))); + ASSERT(parsedProject->getType() == ProjectionType::kInclusion); + + parsedProject = + ParsedAggregationProjection::create(BSON("_id" << BSON("x" << wrapInLiteral(1)))); + ASSERT(parsedProject->getType() == ProjectionType::kInclusion); + + parsedProject = + ParsedAggregationProjection::create(BSON("x" << BSON("_id" << wrapInLiteral(1)))); + ASSERT(parsedProject->getType() == ProjectionType::kInclusion); +} + +TEST(ParsedAggregationProjectionType, ShouldAllowMixOfInclusionAndComputedFields) { + auto parsedProject = + ParsedAggregationProjection::create(BSON("a" << true << "b" << wrapInLiteral(1))); + ASSERT(parsedProject->getType() == ProjectionType::kInclusion); + + parsedProject = + ParsedAggregationProjection::create(BSON("a.b" << true << "a.c" << wrapInLiteral(1))); + ASSERT(parsedProject->getType() == ProjectionType::kInclusion); + + parsedProject = ParsedAggregationProjection::create( + BSON("a" << BSON("b" << true << "c" << wrapInLiteral(1)))); + ASSERT(parsedProject->getType() == ProjectionType::kInclusion); +} + +TEST(ParsedAggregationProjectionType, ShouldCoerceNumericsToBools) { + std::vector<Value> zeros = {Value(0), Value(0LL), Value(0.0), Value(Decimal128(0))}; + for (auto&& zero : zeros) { + auto parsedProject = ParsedAggregationProjection::create(Document{{"a", zero}}.toBson()); + ASSERT(parsedProject->getType() == ProjectionType::kExclusion); + } + + std::vector<Value> nonZeroes = { + Value(1), Value(-1), Value(3), Value(1LL), Value(1.0), Value(Decimal128(1))}; + for (auto&& nonZero : nonZeroes) { + auto parsedProject = ParsedAggregationProjection::create(Document{{"a", nonZero}}.toBson()); + ASSERT(parsedProject->getType() == ProjectionType::kInclusion); + } +} + +} // namespace +} // namespace parsed_aggregation_projection +} // namespace mongo diff --git a/src/mongo/db/pipeline/parsed_exclusion_projection.cpp b/src/mongo/db/pipeline/parsed_exclusion_projection.cpp new file mode 100644 index 00000000000..198a85e7a69 --- /dev/null +++ b/src/mongo/db/pipeline/parsed_exclusion_projection.cpp @@ -0,0 +1,190 @@ +/** + * Copyright (C) 2016 MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/pipeline/parsed_exclusion_projection.h" + +#include "mongo/db/pipeline/document.h" +#include "mongo/db/pipeline/field_path.h" +#include "mongo/db/pipeline/value.h" +#include "mongo/stdx/memory.h" + +namespace mongo { + +namespace parsed_aggregation_projection { + +// +// ExclusionNode. +// + +ExclusionNode::ExclusionNode(std::string pathToNode) : _pathToNode(std::move(pathToNode)) {} + +Document ExclusionNode::serialize() const { + MutableDocument output; + for (auto&& excludedField : _excludedFields) { + output.addField(excludedField, Value(false)); + } + + for (auto&& childPair : _children) { + output.addField(childPair.first, Value(childPair.second->serialize())); + } + return output.freeze(); +} + +void ExclusionNode::excludePath(FieldPath path) { + if (path.getPathLength() == 1) { + _excludedFields.insert(path.fullPath()); + return; + } + addOrGetChild(path.getFieldName(0))->excludePath(path.tail()); +} + +Document ExclusionNode::applyProjection(Document input) const { + MutableDocument output(input); + for (auto&& field : _excludedFields) { + output.remove(field); + } + for (auto&& childPair : _children) { + output[childPair.first] = childPair.second->applyProjectionToValue(input[childPair.first]); + } + return output.freeze(); +} + +ExclusionNode* ExclusionNode::addOrGetChild(FieldPath fieldPath) { + invariant(fieldPath.getPathLength() == 1); + auto child = getChild(fieldPath.fullPath()); + return child ? child : addChild(fieldPath.fullPath()); +} + +ExclusionNode* ExclusionNode::getChild(std::string field) const { + auto it = _children.find(field); + return it == _children.end() ? nullptr : it->second.get(); +} + +ExclusionNode* ExclusionNode::addChild(std::string field) { + auto pathToChild = _pathToNode.empty() ? field : _pathToNode + "." + field; + + auto emplacedPair = _children.emplace( + std::make_pair(std::move(field), stdx::make_unique<ExclusionNode>(pathToChild))); + + // emplacedPair is a pair<iterator position, bool inserted>. + invariant(emplacedPair.second); + + return emplacedPair.first->second.get(); +} + +Value ExclusionNode::applyProjectionToValue(Value val) const { + switch (val.getType()) { + case BSONType::Object: + return Value(applyProjection(val.getDocument())); + case BSONType::Array: { + // Apply exclusion to each element of the array. Note that numeric paths aren't treated + // specially, and we will always apply the projection to each element in the array. + // + // For example, applying the projection {"a.1": 0} to the document + // {a: [{b: 0, "1": 0}, {b: 1, "1": 1}]} will not result in {a: [{b: 0, "1": 0}]}, but + // instead will result in {a: [{b: 0}, {b: 1}]}. + std::vector<Value> values = val.getArray(); + for (auto it = values.begin(); it != values.end(); it++) { + *it = applyProjectionToValue(*it); + } + return Value(std::move(values)); + } + default: + return val; + } +} + +// +// ParsedExclusionProjection. +// + +Document ParsedExclusionProjection::serialize(bool explain) const { + return _root->serialize(); +} + +Document ParsedExclusionProjection::applyProjection(Document inputDoc) const { + return _root->applyProjection(inputDoc); +} + +void ParsedExclusionProjection::parse(const BSONObj& spec, ExclusionNode* node, size_t depth) { + for (auto elem : spec) { + const auto fieldName = elem.fieldNameStringData().toString(); + + // A $ should have been detected in ParsedAggregationProjection's parsing before we get + // here. + invariant(fieldName[0] != '$'); + + switch (elem.type()) { + case BSONType::Bool: + case BSONType::NumberInt: + case BSONType::NumberLong: + case BSONType::NumberDouble: + case BSONType::NumberDecimal: { + // We have already verified this is an exclusion projection. + invariant(!elem.trueValue()); + + node->excludePath(FieldPath(fieldName)); + break; + } + case BSONType::Object: { + // This object represents a nested projection specification, like the sub-object in + // {a: {b: 0, c: 0}} or {"a.b": {c: 0}}. + ExclusionNode* child; + + if (elem.fieldNameStringData().find('.') == std::string::npos) { + child = node->addOrGetChild(fieldName); + } else { + // A dotted field is not allowed in a sub-object, and should have been detected + // in ParsedAggregationProjection's parsing before we get here. + invariant(depth == 0); + + // We need to keep adding children to our tree until we create a child that + // represents this dotted path. + child = node; + auto fullPath = FieldPath(fieldName); + while (fullPath.getPathLength() > 1) { + child = child->addOrGetChild(fullPath.getFieldName(0)); + fullPath = fullPath.tail(); + } + // It is illegal to construct an empty FieldPath, so the above loop ends one + // iteration too soon. Add the last path here. + child = child->addOrGetChild(fullPath.fullPath()); + } + + parse(elem.Obj(), child, depth + 1); + break; + } + default: { MONGO_UNREACHABLE; } + } + } +} + +} // namespace parsed_aggregation_projection +} // namespace mongo diff --git a/src/mongo/db/pipeline/parsed_exclusion_projection.h b/src/mongo/db/pipeline/parsed_exclusion_projection.h new file mode 100644 index 00000000000..540632f3867 --- /dev/null +++ b/src/mongo/db/pipeline/parsed_exclusion_projection.h @@ -0,0 +1,134 @@ +/** + * Copyright (C) 2016 MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <memory> +#include <string> +#include <unordered_map> +#include <unordered_set> + +#include "mongo/db/pipeline/parsed_aggregation_projection.h" + +namespace mongo { + +class FieldPath; +class Value; + +namespace parsed_aggregation_projection { + +/** + * A node used to define the parsed structure of an exclusion projection. Each ExclusionNode + * represents one 'level' of the parsed specification. The root ExclusionNode represents all top + * level exclusions, with any child ExclusionNodes representing dotted or nested exclusions. + */ +class ExclusionNode { +public: + ExclusionNode(std::string pathToNode = ""); + + /** + * Serialize this exclusion. + */ + Document serialize() const; + + /** + * Mark this path to be excluded. 'path' is allowed to be dotted. + */ + void excludePath(FieldPath path); + + /** + * Applies this tree of exclusions to the input document. + */ + Document applyProjection(Document input) const; + + /** + * Creates the child if it doesn't already exist. 'field' is not allowed to be dotted. + */ + ExclusionNode* addOrGetChild(FieldPath field); + + +private: + // Helpers for addOrGetChild above. + ExclusionNode* getChild(std::string field) const; + ExclusionNode* addChild(std::string field); + + // Helper for applyProjection above. + Value applyProjectionToValue(Value val) const; + + // Fields excluded at this level. + std::unordered_set<std::string> _excludedFields; + + std::string _pathToNode; + std::unordered_map<std::string, std::unique_ptr<ExclusionNode>> _children; +}; + +/** + * A ParsedExclusionProjection represents a parsed form of the raw BSON specification. + * + * This class is mostly a wrapper around an ExclusionNode tree. It contains logic to parse a + * specification object into the corresponding ExclusionNode tree, but defers most execution logic + * to the underlying tree. + */ +class ParsedExclusionProjection : public ParsedAggregationProjection { +public: + ParsedExclusionProjection() : ParsedAggregationProjection(), _root(new ExclusionNode()) {} + + ProjectionType getType() const final { + return ProjectionType::kExclusion; + } + + Document serialize(bool explain = false) const final; + + /** + * Parses the projection specification given by 'spec', populating internal data structures. + */ + void parse(const BSONObj& spec) final { + parse(spec, _root.get(), 0); + } + + /** + * Exclude the fields specified. + */ + Document applyProjection(Document inputDoc) const final; + +private: + /** + * Helper for parse() above. + * + * Traverses 'spec' and parses each field. Adds any excluded fields at this level to 'node', + * and recurses on any sub-objects. + */ + void parse(const BSONObj& spec, ExclusionNode* node, size_t depth); + + + // The ExclusionNode tree does most of the execution work once constructed. + std::unique_ptr<ExclusionNode> _root; +}; + +} // namespace parsed_aggregation_projection +} // namespace mongo diff --git a/src/mongo/db/pipeline/parsed_exclusion_projection_test.cpp b/src/mongo/db/pipeline/parsed_exclusion_projection_test.cpp new file mode 100644 index 00000000000..2817d440fed --- /dev/null +++ b/src/mongo/db/pipeline/parsed_exclusion_projection_test.cpp @@ -0,0 +1,287 @@ +/** + * Copyright (C) 2016 MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/pipeline/parsed_exclusion_projection.h" + +#include <iostream> +#include <iterator> +#include <string> + +#include "mongo/bson/bsonmisc.h" +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/bson/json.h" +#include "mongo/db/pipeline/dependencies.h" +#include "mongo/db/pipeline/document.h" +#include "mongo/db/pipeline/value.h" +#include "mongo/unittest/death_test.h" +#include "mongo/unittest/unittest.h" + +namespace mongo { +namespace parsed_aggregation_projection { +namespace { +using std::vector; + +// +// Errors. +// + +DEATH_TEST(ExclusionProjection, + ShouldRejectComputedField, + "Invariant failure fieldName[0] != '$'") { + ParsedExclusionProjection exclusion; + // Top-level expression. + exclusion.parse(BSON("a" << false << "b" << BSON("$literal" << 1))); +} + +DEATH_TEST(ExclusionProjection, + ShouldFailWhenGivenIncludedField, + "Invariant failure !elem.trueValue()") { + ParsedExclusionProjection exclusion; + exclusion.parse(BSON("a" << true)); +} + +DEATH_TEST(ExclusionProjection, + ShouldFailWhenGivenIncludedId, + "Invariant failure !elem.trueValue()") { + ParsedExclusionProjection exclusion; + exclusion.parse(BSON("_id" << true << "a" << false)); +} + +TEST(ExclusionProjection, ShouldSerializeToEquivalentProjection) { + ParsedExclusionProjection exclusion; + exclusion.parse( + fromjson("{a: 0, b: {c: NumberLong(0), d: 0.0}, 'x.y': false, _id: NumberInt(0)}")); + + // Converts numbers to bools, converts dotted paths to nested documents. Note order of excluded + // fields is subject to change. + auto serialization = exclusion.serialize(); + ASSERT_EQ(serialization.size(), 4UL); + ASSERT_EQ(serialization["a"], Value(false)); + ASSERT_EQ(serialization["_id"], Value(false)); + + ASSERT_EQ(serialization["b"].getType(), BSONType::Object); + ASSERT_EQ(serialization["b"].getDocument().size(), 2UL); + ASSERT_EQ(serialization["b"].getDocument()["c"], Value(false)); + ASSERT_EQ(serialization["b"].getDocument()["d"], Value(false)); + + ASSERT_EQ(serialization["x"].getType(), BSONType::Object); + ASSERT_EQ(serialization["x"].getDocument().size(), 1UL); + ASSERT_EQ(serialization["x"].getDocument()["y"], Value(false)); +} + +TEST(ExclusionProjection, ShouldNotAddAnyDependencies) { + // An exclusion projection will cause the $project stage to return GetDepsReturn::SEE_NEXT, + // meaning it doesn't strictly require any fields. + // + // For example, if our projection was {a: 0}, and a later stage requires the field "a", then "a" + // will be added to the dependencies correctly. If a later stage doesn't need "a", then we don't + // need to include the "a" in the dependencies of this projection, since it will just be ignored + // later. If there are no later stages, then we will finish the dependency computation + // cycle without full knowledge of which fields are needed, and thus include all the fields. + ParsedExclusionProjection exclusion; + exclusion.parse(BSON("_id" << false << "a" << false << "b.c" << false << "x.y.z" << false)); + + DepsTracker deps; + exclusion.addDependencies(&deps); + + ASSERT_EQ(deps.fields.size(), 0UL); + ASSERT_FALSE(deps.needWholeDocument); + ASSERT_FALSE(deps.needTextScore); +} + +// +// Tests of execution of exclusions at the top level. +// + +TEST(ExclusionProjectionExecutionTest, ShouldExcludeTopLevelField) { + ParsedExclusionProjection exclusion; + exclusion.parse(BSON("a" << false)); + + // More than one field in document. + auto result = exclusion.applyProjection(Document{{"a", 1}, {"b", 2}}); + auto expectedResult = Document{{"b", 2}}; + ASSERT_EQ(result, expectedResult); + + // Specified field is the only field in the document. + result = exclusion.applyProjection(Document{{"a", 1}}); + expectedResult = Document{}; + ASSERT_EQ(result, expectedResult); + + // Specified field is not present in the document. + result = exclusion.applyProjection(Document{{"c", 1}}); + expectedResult = Document{{"c", 1}}; + ASSERT_EQ(result, expectedResult); + + // There are no fields in the document. + result = exclusion.applyProjection(Document{}); + expectedResult = Document{}; + ASSERT_EQ(result, expectedResult); +} + +TEST(ExclusionProjectionExecutionTest, ShouldCoerceNumericsToBools) { + ParsedExclusionProjection exclusion; + exclusion.parse(BSON("a" << Value(0) << "b" << Value(0LL) << "c" << Value(0.0) << "d" + << Value(Decimal128(0)))); + + auto result = exclusion.applyProjection(Document{{"_id", "ID"}, {"a", 1}, {"b", 2}, {"c", 3}}); + auto expectedResult = Document{{"_id", "ID"}}; + ASSERT_EQ(result, expectedResult); +} + +TEST(ExclusionProjectionExecutionTest, ShouldPreserveOrderOfExistingFields) { + ParsedExclusionProjection exclusion; + exclusion.parse(BSON("second" << false)); + auto result = exclusion.applyProjection(Document{{"first", 0}, {"second", 1}, {"third", 2}}); + auto expectedResult = Document{{"first", 0}, {"third", 2}}; + ASSERT_EQ(result, expectedResult); +} + +TEST(ExclusionProjectionExecutionTest, ShouldImplicitlyIncludeId) { + ParsedExclusionProjection exclusion; + exclusion.parse(BSON("a" << false)); + auto result = exclusion.applyProjection(Document{{"a", 1}, {"b", 2}, {"_id", "ID"}}); + auto expectedResult = Document{{"b", 2}, {"_id", "ID"}}; + ASSERT_EQ(result, expectedResult); +} + +TEST(ExclusionProjectionExecutionTest, ShouldExcludeIdIfExplicitlyExcluded) { + ParsedExclusionProjection exclusion; + exclusion.parse(BSON("a" << false << "_id" << false)); + auto result = exclusion.applyProjection(Document{{"a", 1}, {"b", 2}, {"_id", "ID"}}); + auto expectedResult = Document{{"b", 2}}; + ASSERT_EQ(result, expectedResult); +} + +// +// Tests of execution of nested exclusions. +// + +TEST(ExclusionProjectionExecutionTest, ShouldExcludeSubFieldsOfId) { + ParsedExclusionProjection exclusion; + exclusion.parse(BSON("_id.x" << false << "_id" << BSON("y" << false))); + auto result = exclusion.applyProjection( + Document{{"_id", Document{{"x", 1}, {"y", 2}, {"z", 3}}}, {"a", 1}}); + auto expectedResult = Document{{"_id", Document{{"z", 3}}}, {"a", 1}}; + ASSERT_EQ(result, expectedResult); +} + +TEST(ExclusionProjectionExecutionTest, ShouldExcludeSimpleDottedFieldFromSubDoc) { + ParsedExclusionProjection exclusion; + exclusion.parse(BSON("a.b" << false)); + + // More than one field in sub document. + auto result = exclusion.applyProjection(Document{{"a", Document{{"b", 1}, {"c", 2}}}}); + auto expectedResult = Document{{"a", Document{{"c", 2}}}}; + ASSERT_EQ(result, expectedResult); + + // Specified field is the only field in the sub document. + result = exclusion.applyProjection(Document{{"a", Document{{"b", 1}}}}); + expectedResult = Document{{"a", Document{}}}; + ASSERT_EQ(result, expectedResult); + + // Specified field is not present in the sub document. + result = exclusion.applyProjection(Document{{"a", Document{{"c", 1}}}}); + expectedResult = Document{{"a", Document{{"c", 1}}}}; + ASSERT_EQ(result, expectedResult); + + // There are no fields in sub document. + result = exclusion.applyProjection(Document{{"a", Document{}}}); + expectedResult = Document{{"a", Document{}}}; + ASSERT_EQ(result, expectedResult); +} + +TEST(ExclusionProjectionExecutionTest, ShouldNotCreateSubDocIfDottedExcludedFieldDoesNotExist) { + ParsedExclusionProjection exclusion; + exclusion.parse(BSON("sub.target" << false)); + + // Should not add the path if it doesn't exist. + auto result = exclusion.applyProjection(Document{}); + auto expectedResult = Document{}; + ASSERT_EQ(result, expectedResult); + + // Should not replace non-documents with documents. + result = exclusion.applyProjection(Document{{"sub", "notADocument"}}); + expectedResult = Document{{"sub", "notADocument"}}; + ASSERT_EQ(result, expectedResult); +} + +TEST(ExclusionProjectionExecutionTest, ShouldApplyDottedExclusionToEachElementInArray) { + ParsedExclusionProjection exclusion; + exclusion.parse(BSON("a.b" << false)); + + std::vector<Value> nestedValues = { + Value(1), + Value(Document{}), + Value(Document{{"b", 1}}), + Value(Document{{"b", 1}, {"c", 2}}), + Value(vector<Value>{}), + Value(vector<Value>{Value(1), Value(Document{{"c", 1}, {"b", 1}})})}; + std::vector<Value> expectedNestedValues = { + Value(1), + Value(Document{}), + Value(Document{}), + Value(Document{{"c", 2}}), + Value(vector<Value>{}), + Value(vector<Value>{Value(1), Value(Document{{"c", 1}})})}; + auto result = exclusion.applyProjection(Document{{"a", nestedValues}}); + auto expectedResult = Document{{"a", expectedNestedValues}}; + ASSERT_EQ(result, expectedResult); +} + +TEST(ExclusionProjectionExecutionTest, ShouldAllowMixedNestedAndDottedFields) { + ParsedExclusionProjection exclusion; + // Exclude all of "a.b", "a.c", "a.d", and "a.e". + exclusion.parse( + BSON("a.b" << false << "a.c" << false << "a" << BSON("d" << false << "e" << false))); + auto result = exclusion.applyProjection( + Document{{"a", Document{{"b", 1}, {"c", 2}, {"d", 3}, {"e", 4}, {"f", 5}}}}); + auto expectedResult = Document{{"a", Document{{"f", 5}}}}; + ASSERT_EQ(result, expectedResult); +} + +TEST(ExclusionProjectionExecutionTest, ShouldAlwaysKeepMetadataFromOriginalDoc) { + ParsedExclusionProjection exclusion; + exclusion.parse(BSON("a" << false)); + + MutableDocument inputDocBuilder(Document{{"_id", "ID"}, {"a", 1}}); + inputDocBuilder.setRandMetaField(1.0); + inputDocBuilder.setTextScore(10.0); + Document inputDoc = inputDocBuilder.freeze(); + + auto result = exclusion.applyProjection(inputDoc); + + MutableDocument expectedDoc(Document{{"_id", "ID"}}); + expectedDoc.copyMetaDataFrom(inputDoc); + ASSERT_EQ(result, expectedDoc.freeze()); +} + +} // namespace +} // namespace parsed_aggregation_projection +} // namespace mongo diff --git a/src/mongo/db/pipeline/parsed_inclusion_projection.cpp b/src/mongo/db/pipeline/parsed_inclusion_projection.cpp new file mode 100644 index 00000000000..69d600f5551 --- /dev/null +++ b/src/mongo/db/pipeline/parsed_inclusion_projection.cpp @@ -0,0 +1,380 @@ +/** + * Copyright (C) 2016 MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/pipeline/parsed_inclusion_projection.h" + +#include <algorithm> + +namespace mongo { + +namespace parsed_aggregation_projection { + +using std::string; +using std::unique_ptr; + +// +// InclusionNode +// + +InclusionNode::InclusionNode(std::string pathToNode) : _pathToNode(std::move(pathToNode)) {} + +void InclusionNode::optimize() { + for (auto&& expressionIt : _expressions) { + _expressions[expressionIt.first] = expressionIt.second->optimize(); + } + for (auto&& childPair : _children) { + childPair.second->optimize(); + } +} + +void InclusionNode::serialize(MutableDocument* output, bool explain) const { + // Always put "_id" first if it was included (implicitly or explicitly). + if (_inclusions.find("_id") != _inclusions.end()) { + output->addField("_id", Value(true)); + } + + for (auto&& includedField : _inclusions) { + if (includedField == "_id") { + // Handled above. + continue; + } + output->addField(includedField, Value(true)); + } + + for (auto&& field : _orderToProcessAdditionsAndChildren) { + auto childIt = _children.find(field); + if (childIt != _children.end()) { + MutableDocument subDoc; + childIt->second->serialize(&subDoc, explain); + output->addField(field, subDoc.freezeToValue()); + } else { + auto expressionIt = _expressions.find(field); + invariant(expressionIt != _expressions.end()); + output->addField(field, expressionIt->second->serialize(explain)); + } + } +} + +void InclusionNode::addDependencies(DepsTracker* deps) const { + for (auto&& includedField : _inclusions) { + deps->fields.insert(FieldPath::getFullyQualifiedPath(_pathToNode, includedField)); + } + + if (!_pathToNode.empty() && !_expressions.empty()) { + // The shape of any computed fields in the output will change depending on if the field is + // an array or not, so in addition to any dependencies of the expression itself, we need to + // add this field to our dependencies. + deps->fields.insert(_pathToNode); + } + + for (auto&& expressionPair : _expressions) { + expressionPair.second->addDependencies(deps); + } + for (auto&& childPair : _children) { + childPair.second->addDependencies(deps); + } +} + +void InclusionNode::applyInclusions(Document inputDoc, MutableDocument* outputDoc) const { + auto it = inputDoc.fieldIterator(); + while (it.more()) { + auto fieldPair = it.next(); + auto fieldName = fieldPair.first.toString(); + if (_inclusions.find(fieldName) != _inclusions.end()) { + outputDoc->addField(fieldName, fieldPair.second); + continue; + } + + auto childIt = _children.find(fieldName); + if (childIt != _children.end()) { + outputDoc->addField(fieldName, + childIt->second->applyInclusionsToValue(fieldPair.second)); + } + } +} + +Value InclusionNode::applyInclusionsToValue(Value inputValue) const { + if (inputValue.getType() == BSONType::Object) { + MutableDocument output; + applyInclusions(inputValue.getDocument(), &output); + return output.freezeToValue(); + } else if (inputValue.getType() == BSONType::Array) { + std::vector<Value> values = inputValue.getArray(); + for (auto it = values.begin(); it != values.end(); ++it) { + *it = applyInclusionsToValue(*it); + } + return Value(std::move(values)); + } else { + // This represents the case where we are including children of a field which does not have + // any children. e.g. applying the projection {"a.b": true} to the document {a: 2}. It is + // somewhat weird, but our semantics are to return a document without the field "a". To do + // so, we return the "missing" value here. + return Value(); + } +} + +void InclusionNode::addComputedFields(MutableDocument* outputDoc, Variables* vars) const { + for (auto&& field : _orderToProcessAdditionsAndChildren) { + auto childIt = _children.find(field); + if (childIt != _children.end()) { + outputDoc->setField(field, + childIt->second->addComputedFields(outputDoc->peek()[field], vars)); + } else { + auto expressionIt = _expressions.find(field); + invariant(expressionIt != _expressions.end()); + outputDoc->setField(field, expressionIt->second->evaluate(vars)); + } + } +} + +Value InclusionNode::addComputedFields(Value inputValue, Variables* vars) const { + if (inputValue.getType() == BSONType::Object) { + MutableDocument outputDoc(inputValue.getDocument()); + addComputedFields(&outputDoc, vars); + return outputDoc.freezeToValue(); + } else if (inputValue.getType() == BSONType::Array) { + std::vector<Value> values = inputValue.getArray(); + for (auto it = values.begin(); it != values.end(); ++it) { + *it = addComputedFields(*it, vars); + } + return Value(std::move(values)); + } else { + if (subtreeContainsComputedFields()) { + // Our semantics in this case are to replace whatever existing value we find with a new + // document of all the computed values. This case represents applying a projection like + // {"a.b": {$literal: 1}} to the document {a: 1}. This should yield {a: {b: 1}}. + MutableDocument outputDoc; + addComputedFields(&outputDoc, vars); + return outputDoc.freezeToValue(); + } + // We didn't have any expressions, so just return the missing value. + return Value(); + } +} + +bool InclusionNode::subtreeContainsComputedFields() const { + return (!_expressions.empty()) || + std::any_of( + _children.begin(), + _children.end(), + [](const std::pair<const std::string, std::unique_ptr<InclusionNode>>& childPair) { + return childPair.second->subtreeContainsComputedFields(); + }); +} + +void InclusionNode::addComputedField(const FieldPath& path, boost::intrusive_ptr<Expression> expr) { + if (path.getPathLength() == 1) { + auto fieldName = path.fullPath(); + _expressions[fieldName] = expr; + _orderToProcessAdditionsAndChildren.push_back(fieldName); + return; + } + addOrGetChild(path.getFieldName(0))->addComputedField(path.tail(), expr); +} + +void InclusionNode::addIncludedField(const FieldPath& path) { + if (path.getPathLength() == 1) { + _inclusions.insert(path.fullPath()); + return; + } + addOrGetChild(path.getFieldName(0))->addIncludedField(path.tail()); +} + +InclusionNode* InclusionNode::addOrGetChild(std::string field) { + auto child = getChild(field); + return child ? child : addChild(field); +} + +InclusionNode* InclusionNode::getChild(string field) const { + auto childIt = _children.find(field); + return childIt == _children.end() ? nullptr : childIt->second.get(); +} + +InclusionNode* InclusionNode::addChild(string field) { + invariant(!str::contains(field, ".")); + _orderToProcessAdditionsAndChildren.push_back(field); + auto childPath = FieldPath::getFullyQualifiedPath(_pathToNode, field); + auto insertedPair = _children.emplace( + std::make_pair(std::move(field), stdx::make_unique<InclusionNode>(std::move(childPath)))); + return insertedPair.first->second.get(); +} + +// +// ParsedInclusionProjection +// + +void ParsedInclusionProjection::parse(const BSONObj& spec, + const VariablesParseState& variablesParseState) { + // It is illegal to specify a projection with no output fields. + bool atLeastOneFieldInOutput = false; + + // Tracks whether or not we should implicitly include "_id". + bool idSpecified = false; + + for (auto elem : spec) { + auto fieldName = elem.fieldNameStringData(); + idSpecified = idSpecified || fieldName == "_id" || fieldName.startsWith("_id."); + if (fieldName == "_id") { + const bool idIsExcluded = (!elem.trueValue() && (elem.isNumber() || elem.isBoolean())); + if (idIsExcluded) { + // Ignoring "_id" here will cause it to be excluded from result documents. + _idExcluded = true; + continue; + } + + // At least part of "_id" is included or a computed field. Fall through to below to + // parse what exactly "_id" was specified as. + } + + atLeastOneFieldInOutput = true; + switch (elem.type()) { + case BSONType::Bool: + case BSONType::NumberInt: + case BSONType::NumberLong: + case BSONType::NumberDouble: + case BSONType::NumberDecimal: { + // This is an inclusion specification. + invariant(elem.trueValue()); + _root->addIncludedField(FieldPath(elem.fieldName())); + break; + } + case BSONType::Object: { + // This is either an expression, or a nested specification. + if (parseObjectAsExpression(fieldName, elem.Obj(), variablesParseState)) { + // It was an expression. + break; + } + + // The field name might be a dotted path. If so, we need to keep adding children + // to our tree until we create a child that represents that path. + auto remainingPath = FieldPath(elem.fieldName()); + auto child = _root.get(); + while (remainingPath.getPathLength() > 1) { + child = child->addOrGetChild(remainingPath.getFieldName(0)); + remainingPath = remainingPath.tail(); + } + // It is illegal to construct an empty FieldPath, so the above loop ends one + // iteration too soon. Add the last path here. + child = child->addOrGetChild(remainingPath.fullPath()); + + parseSubObject(elem.Obj(), variablesParseState, child); + break; + } + default: { + // This is a literal value. + _root->addComputedField(FieldPath(elem.fieldName()), + Expression::parseOperand(elem, variablesParseState)); + } + } + } + + if (!idSpecified) { + // "_id" wasn't specified, so include it by default. + atLeastOneFieldInOutput = true; + _root->addIncludedField(FieldPath("_id")); + } + + uassert(16403, + str::stream() << "$project requires at least one output field: " << spec.toString(), + atLeastOneFieldInOutput); +} + +Document ParsedInclusionProjection::applyProjection(Document inputDoc, Variables* vars) const { + // All expressions will be evaluated in the context of the input document, before any + // transformations have been applied. + vars->setRoot(inputDoc); + + MutableDocument output; + _root->applyInclusions(inputDoc, &output); + _root->addComputedFields(&output, vars); + + // Always pass through the metadata. + output.copyMetaDataFrom(inputDoc); + return output.freeze(); +} + +bool ParsedInclusionProjection::parseObjectAsExpression( + StringData pathToObject, + const BSONObj& objSpec, + const VariablesParseState& variablesParseState) { + if (objSpec.firstElementFieldName()[0] == '$') { + // This is an expression like {$add: [...]}. We have already verified that it has only one + // field. + invariant(objSpec.nFields() == 1); + _root->addComputedField(pathToObject.toString(), + Expression::parseExpression(objSpec, variablesParseState)); + return true; + } + return false; +} + +void ParsedInclusionProjection::parseSubObject(const BSONObj& subObj, + const VariablesParseState& variablesParseState, + InclusionNode* node) { + for (auto elem : subObj) { + invariant(elem.fieldName()[0] != '$'); + // Dotted paths in a sub-object have already been disallowed in + // ParsedAggregationProjection's parsing. + invariant(elem.fieldNameStringData().find('.') == std::string::npos); + + switch (elem.type()) { + case BSONType::Bool: + case BSONType::NumberInt: + case BSONType::NumberLong: + case BSONType::NumberDouble: + case BSONType::NumberDecimal: { + // This is an inclusion specification. + invariant(elem.trueValue()); + node->addIncludedField(FieldPath(elem.fieldName())); + break; + } + case BSONType::Object: { + // This is either an expression, or a nested specification. + auto fieldName = elem.fieldNameStringData().toString(); + if (parseObjectAsExpression( + FieldPath::getFullyQualifiedPath(node->getPath(), fieldName), + elem.Obj(), + variablesParseState)) { + break; + } + auto child = node->addOrGetChild(fieldName); + parseSubObject(elem.Obj(), variablesParseState, child); + break; + } + default: { + // This is a literal value. + node->addComputedField(FieldPath(elem.fieldName()), + Expression::parseOperand(elem, variablesParseState)); + } + } + } +} +} // namespace parsed_aggregation_projection +} // namespace mongo diff --git a/src/mongo/db/pipeline/parsed_inclusion_projection.h b/src/mongo/db/pipeline/parsed_inclusion_projection.h new file mode 100644 index 00000000000..7236d8f85eb --- /dev/null +++ b/src/mongo/db/pipeline/parsed_inclusion_projection.h @@ -0,0 +1,263 @@ +/** + * Copyright (C) 2016 MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <memory> +#include <unordered_map> +#include <unordered_set> + +#include "mongo/db/pipeline/expression.h" +#include "mongo/db/pipeline/parsed_aggregation_projection.h" +#include "mongo/stdx/memory.h" + +namespace mongo { + +class FieldPath; +class Value; + +namespace parsed_aggregation_projection { + +/** + * A node used to define the parsed structure of an inclusion projection. Each InclusionNode + * represents one 'level' of the parsed specification. The root InclusionNode represents all top + * level inclusions or additions, with any child InclusionNodes representing dotted or nested + * inclusions or additions. + */ +class InclusionNode { +public: + InclusionNode(std::string pathToNode = ""); + + /** + * Optimize any computed expressions. + */ + void optimize(); + + /** + * Serialize this projection. + */ + void serialize(MutableDocument* output, bool explain) const; + + /** + * Adds dependencies of any fields that need to be included, or that are used by any + * expressions. + */ + void addDependencies(DepsTracker* deps) const; + + /** + * Loops over 'inputDoc', extracting and appending any included fields into 'outputDoc'. This + * will also copy over enough information to preserve the structure of the incoming document for + * all the fields this projection cares about. + * + * For example, given an InclusionNode tree representing this projection: + * {a: {b: 1, c: <exp>}, "d.e": <exp>} + * calling applyInclusions() with an 'inputDoc' of + * {a: [{b: 1, d: 1}, {b: 2, d: 2}], d: [{e: 1, f: 1}, {e: 1, f: 1}]} + * and an empty 'outputDoc' will leave 'outputDoc' representing the document + * {a: [{b: 1}, {b: 2}], d: [{}, {}]}. + */ + void applyInclusions(Document inputDoc, MutableDocument* outputDoc) const; + + /** + * Add computed fields to 'outputDoc'. 'vars' is passed through to be used in Expression + * evaluation. + */ + void addComputedFields(MutableDocument* outputDoc, Variables* vars) const; + + /** + * Creates the child if it doesn't already exist. 'field' is not allowed to be dotted. + */ + InclusionNode* addOrGetChild(std::string field); + + /** + * Recursively adds 'path' into the tree as a computed field, creating any child nodes if + * necessary. + * + * 'path' is allowed to be dotted, and is assumed not to conflict with another path already in + * the tree. For example, it is an error to add the path "a.b" as a computed field to a tree + * which has already included the field "a". + */ + void addComputedField(const FieldPath& path, boost::intrusive_ptr<Expression> expr); + + /** + * Recursively adds 'path' into the tree as an included field, creating any child nodes if + * necessary. + * + * 'path' is allowed to be dotted, and is assumed not to conflict with another path already in + * the tree. For example, it is an error to include the path "a.b" from a tree which has already + * added a computed field "a". + */ + void addIncludedField(const FieldPath& path); + + std::string getPath() const { + return _pathToNode; + } + +private: + // Helpers for the Document versions above. These will apply the transformation recursively to + // each element of any arrays, and ensure non-documents are handled appropriately. + Value applyInclusionsToValue(Value inputVal) const; + Value addComputedFields(Value inputVal, Variables* vars) const; + + /** + * Returns nullptr if no such child exists. + */ + InclusionNode* getChild(std::string field) const; + + /** + * Adds a new InclusionNode as a child. 'field' cannot be dotted. + */ + InclusionNode* addChild(std::string field); + + /** + * Returns true if this node or any child of this node contains a computed field. + */ + bool subtreeContainsComputedFields() const; + + std::string _pathToNode; + + // Our projection semantics are such that all field additions need to be processed in the order + // specified. '_orderToProcessAdditionsAndChildren' tracks that order. + // + // For example, for the specification {a: <expression>, "b.c": <expression>, d: <expression>}, + // we need to add the top level fields in the order "a", then "b", then "d". This ordering + // information needs to be tracked separately, since "a" and "d" will be tracked via + // '_expressions', and "b.c" will be tracked as a child InclusionNode in '_children'. For the + // example above, '_orderToProcessAdditionsAndChildren' would be ["a", "b", "d"]. + std::vector<std::string> _orderToProcessAdditionsAndChildren; + + StringMap<boost::intrusive_ptr<Expression>> _expressions; + std::unordered_set<std::string> _inclusions; + + // TODO use StringMap once SERVER-23700 is resolved. + std::unordered_map<std::string, std::unique_ptr<InclusionNode>> _children; +}; + +/** + * A ParsedInclusionProjection represents a parsed form of the raw BSON specification. + * + * This class is mostly a wrapper around an InclusionNode tree. It contains logic to parse a + * specification object into the corresponding InclusionNode tree, but defers most execution logic + * to the underlying tree. + */ +class ParsedInclusionProjection : public ParsedAggregationProjection { +public: + ParsedInclusionProjection() : ParsedAggregationProjection(), _root(new InclusionNode()) {} + + ProjectionType getType() const final { + return ProjectionType::kInclusion; + } + + /** + * Parses the projection specification given by 'spec', populating internal data structures. + */ + void parse(const BSONObj& spec) final { + VariablesIdGenerator idGenerator; + VariablesParseState variablesParseState(&idGenerator); + parse(spec, variablesParseState); + _variables = stdx::make_unique<Variables>(idGenerator.getIdCount()); + } + + /** + * Serialize the projection. + */ + Document serialize(bool explain = false) const final { + MutableDocument output; + if (_idExcluded) { + output.addField("_id", Value(false)); + } + _root->serialize(&output, explain); + return output.freeze(); + } + + /** + * Optimize any computed expressions. + */ + void optimize() final { + _root->optimize(); + } + + void addDependencies(DepsTracker* deps) const final { + _root->addDependencies(deps); + } + + /** + * Apply this exclusion projection to 'inputDoc'. + * + * All inclusions are processed before all computed fields. Computed fields will be added + * afterwards in the order in which they were specified to the $project stage. + * + * Arrays will be traversed, with any dotted/nested exclusions or computed fields applied to + * each element in the array. + */ + Document applyProjection(Document inputDoc) const final { + _variables->setRoot(inputDoc); + return applyProjection(inputDoc, _variables.get()); + } + + Document applyProjection(Document inputDoc, Variables* vars) const; + +private: + /** + * Parses 'spec' to determine which fields to include, which are computed, and whether to + * include '_id' or not. + */ + void parse(const BSONObj& spec, const VariablesParseState& variablesParseState); + + /** + * Attempts to parse 'objSpec' as an expression like {$add: [...]}. Adds a computed field to + * '_root' and returns true if it was successfully parsed as an expression. Returns false if it + * was not an expression specification. + * + * Throws an error if it was determined to be an expression specification, but failed to parse + * as a valid expression. + */ + bool parseObjectAsExpression(StringData pathToObject, + const BSONObj& objSpec, + const VariablesParseState& variablesParseState); + + /** + * Traverses 'subObj' and parses each field. Adds any included or computed fields at this level + * to 'node'. + */ + void parseSubObject(const BSONObj& subObj, + const VariablesParseState& variablesParseState, + InclusionNode* node); + + // Not strictly necessary to track here, but makes serialization easier. + bool _idExcluded = false; + + // The InclusionNode tree does most of the execution work once constructed. + std::unique_ptr<InclusionNode> _root; + + // This is needed to give the expressions knowledge about the context in which they are being + // executed. + std::unique_ptr<Variables> _variables; +}; +} // namespace parsed_aggregation_projection +} // namespace mongo diff --git a/src/mongo/db/pipeline/parsed_inclusion_projection_test.cpp b/src/mongo/db/pipeline/parsed_inclusion_projection_test.cpp new file mode 100644 index 00000000000..f4c1d6a4a0e --- /dev/null +++ b/src/mongo/db/pipeline/parsed_inclusion_projection_test.cpp @@ -0,0 +1,545 @@ +/** + * Copyright (C) 2016 MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/pipeline/parsed_inclusion_projection.h" + +#include <vector> + +#include "mongo/bson/bsonmisc.h" +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/bson/json.h" +#include "mongo/db/pipeline/dependencies.h" +#include "mongo/db/pipeline/document.h" +#include "mongo/db/pipeline/value.h" +#include "mongo/unittest/unittest.h" + +namespace mongo { +namespace parsed_aggregation_projection { +namespace { +using std::vector; + +template <typename T> +BSONObj wrapInLiteral(const T& arg) { + return BSON("$literal" << arg); +} + +TEST(InclusionProjection, ShouldThrowWhenParsingInvalidExpression) { + ParsedInclusionProjection inclusion; + ASSERT_THROWS(inclusion.parse(BSON("a" << BSON("$gt" << BSON("bad" + << "arguments")))), + UserException); +} + +TEST(InclusionProjection, ShouldRejectProjectionWithNoOutputFields) { + ParsedInclusionProjection inclusion; + ASSERT_THROWS(inclusion.parse(BSON("_id" << false)), UserException); +} + +TEST(InclusionProjection, ShouldAddIncludedFieldsToDependencies) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("_id" << false << "a" << true << "x.y" << true)); + + DepsTracker deps; + inclusion.addDependencies(&deps); + + ASSERT_EQ(deps.fields.size(), 2UL); + ASSERT_EQ(deps.fields.count("_id"), 0UL); + ASSERT_EQ(deps.fields.count("a"), 1UL); + ASSERT_EQ(deps.fields.count("x.y"), 1UL); +} + +TEST(InclusionProjection, ShouldAddIdToDependenciesIfNotSpecified) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("a" << true)); + + DepsTracker deps; + inclusion.addDependencies(&deps); + + ASSERT_EQ(deps.fields.size(), 2UL); + ASSERT_EQ(deps.fields.count("_id"), 1UL); + ASSERT_EQ(deps.fields.count("a"), 1UL); +} + +TEST(InclusionProjection, ShouldAddDependenciesOfComputedFields) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("a" + << "$a" + << "x" + << "$z")); + + DepsTracker deps; + inclusion.addDependencies(&deps); + + ASSERT_EQ(deps.fields.size(), 3UL); + ASSERT_EQ(deps.fields.count("_id"), 1UL); + ASSERT_EQ(deps.fields.count("a"), 1UL); + ASSERT_EQ(deps.fields.count("z"), 1UL); +} + +TEST(InclusionProjection, ShouldAddPathToDependenciesForNestedComputedFields) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("x.y" + << "$z")); + + DepsTracker deps; + inclusion.addDependencies(&deps); + + ASSERT_EQ(deps.fields.size(), 3UL); + // Implicit "_id". + ASSERT_EQ(deps.fields.count("_id"), 1UL); + // Needed by the ExpressionFieldPath. + ASSERT_EQ(deps.fields.count("z"), 1UL); + // Needed to ensure we preserve the structure of the input document. + ASSERT_EQ(deps.fields.count("x"), 1UL); +} + +TEST(InclusionProjection, ShouldSerializeToEquivalentProjection) { + ParsedInclusionProjection inclusion; + inclusion.parse(fromjson("{a: {$add: ['$a', 2]}, b: {d: 3}, 'x.y': {$literal: 4}}")); + + // Adds implicit "_id" inclusion, converts numbers to bools, serializes expressions. + auto expectedSerialization = Document(fromjson( + "{_id: true, a: {$add: [\"$a\", {$const: 2}]}, b: {d: true}, x: {y: {$const: 4}}}")); + + // Should be the same if we're serializing for explain or for internal use. + ASSERT_EQ(expectedSerialization, inclusion.serialize(false)); + ASSERT_EQ(expectedSerialization, inclusion.serialize(true)); +} + +TEST(InclusionProjection, ShouldSerializeExplicitExclusionOfId) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("_id" << false << "a" << true)); + + // Adds implicit "_id" inclusion, converts numbers to bools, serializes expressions. + auto expectedSerialization = Document{{"_id", false}, {"a", true}}; + + // Should be the same if we're serializing for explain or for internal use. + ASSERT_EQ(expectedSerialization, inclusion.serialize(false)); + ASSERT_EQ(expectedSerialization, inclusion.serialize(true)); +} + + +TEST(InclusionProjection, ShouldOptimizeTopLevelExpressions) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("a" << BSON("$add" << BSON_ARRAY(1 << 2)))); + + inclusion.optimize(); + + auto expectedSerialization = Document{{"_id", true}, {"a", Document{{"$const", 3}}}}; + + // Should be the same if we're serializing for explain or for internal use. + ASSERT_EQ(expectedSerialization, inclusion.serialize(false)); + ASSERT_EQ(expectedSerialization, inclusion.serialize(true)); +} + +TEST(InclusionProjection, ShouldOptimizeNestedExpressions) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("a.b" << BSON("$add" << BSON_ARRAY(1 << 2)))); + + inclusion.optimize(); + + auto expectedSerialization = + Document{{"_id", true}, {"a", Document{{"b", Document{{"$const", 3}}}}}}; + + // Should be the same if we're serializing for explain or for internal use. + ASSERT_EQ(expectedSerialization, inclusion.serialize(false)); + ASSERT_EQ(expectedSerialization, inclusion.serialize(true)); +} + +// +// Top-level only. +// + +TEST(InclusionProjectionExecutionTest, ShouldIncludeTopLevelField) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("a" << true)); + + // More than one field in document. + auto result = inclusion.applyProjection(Document{{"a", 1}, {"b", 2}}); + auto expectedResult = Document{{"a", 1}}; + ASSERT_EQ(result, expectedResult); + + // Specified field is the only field in the document. + result = inclusion.applyProjection(Document{{"a", 1}}); + expectedResult = Document{{"a", 1}}; + ASSERT_EQ(result, expectedResult); + + // Specified field is not present in the document. + result = inclusion.applyProjection(Document{{"c", 1}}); + expectedResult = Document{}; + ASSERT_EQ(result, expectedResult); + + // There are no fields in the document. + result = inclusion.applyProjection(Document{}); + expectedResult = Document{}; + ASSERT_EQ(result, expectedResult); +} + +TEST(InclusionProjectionExecutionTest, ShouldAddComputedTopLevelField) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("newField" << wrapInLiteral("computedVal"))); + auto result = inclusion.applyProjection(Document{}); + auto expectedResult = Document{{"newField", "computedVal"}}; + ASSERT_EQ(result, expectedResult); + + // Computed field should replace existing field. + result = inclusion.applyProjection(Document{{"newField", "preExisting"}}); + expectedResult = Document{{"newField", "computedVal"}}; + ASSERT_EQ(result, expectedResult); +} + +TEST(InclusionProjectionExecutionTest, ShouldApplyBothInclusionsAndComputedFields) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("a" << true << "newField" << wrapInLiteral("computedVal"))); + auto result = inclusion.applyProjection(Document{{"a", 1}}); + auto expectedResult = Document{{"a", 1}, {"newField", "computedVal"}}; + ASSERT_EQ(result, expectedResult); +} + +TEST(InclusionProjectionExecutionTest, ShouldIncludeFieldsInOrderOfInputDoc) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("first" << true << "second" << true << "third" << true)); + auto inputDoc = Document{{"second", 1}, {"first", 0}, {"third", 2}}; + auto result = inclusion.applyProjection(inputDoc); + ASSERT_EQ(result, inputDoc); +} + +TEST(InclusionProjectionExecutionTest, ShouldApplyComputedFieldsInOrderSpecified) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("firstComputed" << wrapInLiteral("FIRST") << "secondComputed" + << wrapInLiteral("SECOND"))); + auto result = inclusion.applyProjection(Document{{"first", 0}, {"second", 1}, {"third", 2}}); + auto expectedResult = Document{{"firstComputed", "FIRST"}, {"secondComputed", "SECOND"}}; + ASSERT_EQ(result, expectedResult); +} + +TEST(InclusionProjectionExecutionTest, ShouldImplicitlyIncludeId) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("a" << true)); + auto result = inclusion.applyProjection(Document{{"_id", "ID"}, {"a", 1}, {"b", 2}}); + auto expectedResult = Document{{"_id", "ID"}, {"a", 1}}; + ASSERT_EQ(result, expectedResult); + + // Should leave the "_id" in the same place as in the original document. + result = inclusion.applyProjection(Document{{"a", 1}, {"b", 2}, {"_id", "ID"}}); + expectedResult = Document{{"a", 1}, {"_id", "ID"}}; + ASSERT_EQ(result, expectedResult); +} + +TEST(InclusionProjectionExecutionTest, ShouldImplicitlyIncludeIdWithComputedFields) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("newField" << wrapInLiteral("computedVal"))); + auto result = inclusion.applyProjection(Document{{"_id", "ID"}, {"a", 1}}); + auto expectedResult = Document{{"_id", "ID"}, {"newField", "computedVal"}}; + ASSERT_EQ(result, expectedResult); +} + +TEST(InclusionProjectionExecutionTest, ShouldIncludeIdIfExplicitlyIncluded) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("a" << true << "_id" << true << "b" << true)); + auto result = inclusion.applyProjection(Document{{"_id", "ID"}, {"a", 1}, {"b", 2}, {"c", 3}}); + auto expectedResult = Document{{"_id", "ID"}, {"a", 1}, {"b", 2}}; + ASSERT_EQ(result, expectedResult); +} + +TEST(InclusionProjectionExecutionTest, ShouldExcludeIdIfExplicitlyExcluded) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("a" << true << "_id" << false)); + auto result = inclusion.applyProjection(Document{{"a", 1}, {"b", 2}, {"_id", "ID"}}); + auto expectedResult = Document{{"a", 1}}; + ASSERT_EQ(result, expectedResult); +} + +TEST(InclusionProjectionExecutionTest, ShouldReplaceIdWithComputedId) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("_id" << wrapInLiteral("newId"))); + auto result = inclusion.applyProjection(Document{{"a", 1}, {"b", 2}, {"_id", "ID"}}); + auto expectedResult = Document{{"_id", "newId"}}; + ASSERT_EQ(result, expectedResult); +} + +// +// Projections with nested fields. +// + +TEST(InclusionProjectionExecutionTest, ShouldIncludeSimpleDottedFieldFromSubDoc) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("a.b" << true)); + + // More than one field in sub document. + auto result = inclusion.applyProjection(Document{{"a", Document{{"b", 1}, {"c", 2}}}}); + auto expectedResult = Document{{"a", Document{{"b", 1}}}}; + ASSERT_EQ(result, expectedResult); + + // Specified field is the only field in the sub document. + result = inclusion.applyProjection(Document{{"a", Document{{"b", 1}}}}); + expectedResult = Document{{"a", Document{{"b", 1}}}}; + ASSERT_EQ(result, expectedResult); + + // Specified field is not present in the sub document. + result = inclusion.applyProjection(Document{{"a", Document{{"c", 1}}}}); + expectedResult = Document{{"a", Document{}}}; + ASSERT_EQ(result, expectedResult); + + // There are no fields in sub document. + result = inclusion.applyProjection(Document{{"a", Document{}}}); + expectedResult = Document{{"a", Document{}}}; + ASSERT_EQ(result, expectedResult); +} + +TEST(InclusionProjectionExecutionTest, ShouldNotCreateSubDocIfDottedIncludedFieldDoesNotExist) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("sub.target" << true)); + + // Should not add the path if it doesn't exist. + auto result = inclusion.applyProjection(Document{}); + auto expectedResult = Document{}; + ASSERT_EQ(result, expectedResult); + + // Should not replace the first part of the path if that part exists. + result = inclusion.applyProjection(Document{{"sub", "notADocument"}}); + expectedResult = Document{}; + ASSERT_EQ(result, expectedResult); +} + +TEST(InclusionProjectionExecutionTest, ShouldApplyDottedInclusionToEachElementInArray) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("a.b" << true)); + + vector<Value> nestedValues = {Value(1), + Value(Document{}), + Value(Document{{"b", 1}}), + Value(Document{{"b", 1}, {"c", 2}}), + Value(vector<Value>{}), + Value(vector<Value>{Value(1), Value(Document{{"c", 1}})})}; + + // Drops non-documents and non-arrays. Applies projection to documents, recurses on nested + // arrays. + vector<Value> expectedNestedValues = {Value(), + Value(Document{}), + Value(Document{{"b", 1}}), + Value(Document{{"b", 1}}), + Value(vector<Value>{}), + Value(vector<Value>{Value(), Value(Document{})})}; + auto result = inclusion.applyProjection(Document{{"a", nestedValues}}); + auto expectedResult = Document{{"a", expectedNestedValues}}; + ASSERT_EQ(result, expectedResult); +} + +TEST(InclusionProjectionExecutionTest, ShouldAddComputedDottedFieldToSubDocument) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("sub.target" << wrapInLiteral("computedVal"))); + + // Other fields exist in sub document, one of which is the specified field. + auto result = inclusion.applyProjection(Document{{"sub", Document{{"target", 1}, {"c", 2}}}}); + auto expectedResult = Document{{"sub", Document{{"target", "computedVal"}}}}; + ASSERT_EQ(result, expectedResult); + + // Specified field is not present in the sub document. + result = inclusion.applyProjection(Document{{"sub", Document{{"c", 1}}}}); + expectedResult = Document{{"sub", Document{{"target", "computedVal"}}}}; + ASSERT_EQ(result, expectedResult); + + // There are no fields in sub document. + result = inclusion.applyProjection(Document{{"sub", Document{}}}); + expectedResult = Document{{"sub", Document{{"target", "computedVal"}}}}; + ASSERT_EQ(result, expectedResult); +} + +TEST(InclusionProjectionExecutionTest, ShouldCreateSubDocIfDottedComputedFieldDoesntExist) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("sub.target" << wrapInLiteral("computedVal"))); + + // Should add the path if it doesn't exist. + auto result = inclusion.applyProjection(Document{}); + auto expectedResult = Document{{"sub", Document{{"target", "computedVal"}}}}; + ASSERT_EQ(result, expectedResult); + + // Should replace non-documents with documents. + result = inclusion.applyProjection(Document{{"sub", "notADocument"}}); + ASSERT_EQ(result, expectedResult); +} + +TEST(InclusionProjectionExecutionTest, ShouldCreateNestedSubDocumentsAllTheWayToComputedField) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("a.b.c.d" << wrapInLiteral("computedVal"))); + + // Should add the path if it doesn't exist. + auto result = inclusion.applyProjection(Document{}); + auto expectedResult = + Document{{"a", Document{{"b", Document{{"c", Document{{"d", "computedVal"}}}}}}}}; + ASSERT_EQ(result, expectedResult); + + // Should replace non-documents with documents. + result = inclusion.applyProjection(Document{{"a", Document{{"b", "other"}}}}); + ASSERT_EQ(result, expectedResult); +} + +TEST(InclusionProjectionExecutionTest, ShouldAddComputedDottedFieldToEachElementInArray) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("a.b" << wrapInLiteral("COMPUTED"))); + + vector<Value> nestedValues = {Value(1), + Value(Document{}), + Value(Document{{"b", 1}}), + Value(Document{{"b", 1}, {"c", 2}}), + Value(vector<Value>{}), + Value(vector<Value>{Value(1), Value(Document{{"c", 1}})})}; + vector<Value> expectedNestedValues = {Value(Document{{"b", "COMPUTED"}}), + Value(Document{{"b", "COMPUTED"}}), + Value(Document{{"b", "COMPUTED"}}), + Value(Document{{"b", "COMPUTED"}}), + Value(vector<Value>{}), + Value(vector<Value>{Value(Document{{"b", "COMPUTED"}}), + Value(Document{{"b", "COMPUTED"}})})}; + auto result = inclusion.applyProjection(Document{{"a", nestedValues}}); + auto expectedResult = Document{{"a", expectedNestedValues}}; + ASSERT_EQ(result, expectedResult); +} + +TEST(InclusionProjectionExecutionTest, ShouldApplyInclusionsAndAdditionsToEachElementInArray) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("a.inc" << true << "a.comp" << wrapInLiteral("COMPUTED"))); + + vector<Value> nestedValues = {Value(1), + Value(Document{}), + Value(Document{{"inc", 1}}), + Value(Document{{"inc", 1}, {"c", 2}}), + Value(Document{{"c", 2}, {"inc", 1}}), + Value(Document{{"inc", 1}, {"c", 2}, {"comp", "original"}}), + Value(vector<Value>{}), + Value(vector<Value>{Value(1), Value(Document{{"inc", 1}})})}; + vector<Value> expectedNestedValues = { + Value(Document{{"comp", "COMPUTED"}}), + Value(Document{{"comp", "COMPUTED"}}), + Value(Document{{"inc", 1}, {"comp", "COMPUTED"}}), + Value(Document{{"inc", 1}, {"comp", "COMPUTED"}}), + Value(Document{{"inc", 1}, {"comp", "COMPUTED"}}), + Value(Document{{"inc", 1}, {"comp", "COMPUTED"}}), + Value(vector<Value>{}), + Value(vector<Value>{Value(Document{{"comp", "COMPUTED"}}), + Value(Document{{"inc", 1}, {"comp", "COMPUTED"}})})}; + auto result = inclusion.applyProjection(Document{{"a", nestedValues}}); + auto expectedResult = Document{{"a", expectedNestedValues}}; + ASSERT_EQ(result, expectedResult); +} + +TEST(InclusionProjectionExecutionTest, ShouldAddOrIncludeSubFieldsOfId) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("_id.X" << true << "_id.Z" << wrapInLiteral("NEW"))); + auto result = inclusion.applyProjection(Document{{"_id", Document{{"X", 1}, {"Y", 2}}}}); + auto expectedResult = Document{{"_id", Document{{"X", 1}, {"Z", "NEW"}}}}; + ASSERT_EQ(result, expectedResult); +} + +TEST(InclusionProjectionExecutionTest, ShouldAllowMixedNestedAndDottedFields) { + ParsedInclusionProjection inclusion; + // Include all of "a.b", "a.c", "a.d", and "a.e". + // Add new computed fields "a.W", "a.X", "a.Y", and "a.Z". + inclusion.parse(BSON( + "a.b" << true << "a.c" << true << "a.W" << wrapInLiteral("W") << "a.X" << wrapInLiteral("X") + << "a" + << BSON("d" << true << "e" << true << "Y" << wrapInLiteral("Y") << "Z" + << wrapInLiteral("Z")))); + auto result = inclusion.applyProjection( + Document{{"a", Document{{"b", "b"}, {"c", "c"}, {"d", "d"}, {"e", "e"}, {"f", "f"}}}}); + auto expectedResult = Document{{"a", + Document{{"b", "b"}, + {"c", "c"}, + {"d", "d"}, + {"e", "e"}, + {"W", "W"}, + {"X", "X"}, + {"Y", "Y"}, + {"Z", "Z"}}}}; + ASSERT_EQ(result, expectedResult); +} + +TEST(InclusionProjectionExecutionTest, ShouldApplyNestedComputedFieldsInOrderSpecified) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("a" << wrapInLiteral("FIRST") << "b.c" << wrapInLiteral("SECOND"))); + auto result = inclusion.applyProjection(Document{}); + auto expectedResult = Document{{"a", "FIRST"}, {"b", Document{{"c", "SECOND"}}}}; + ASSERT_EQ(result, expectedResult); +} + +TEST(InclusionProjectionExecutionTest, ShouldApplyComputedFieldsAfterAllInclusions) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("b.c" << wrapInLiteral("NEW") << "a" << true)); + auto result = inclusion.applyProjection(Document{{"a", 1}}); + auto expectedResult = Document{{"a", 1}, {"b", Document{{"c", "NEW"}}}}; + ASSERT_EQ(result, expectedResult); + + result = inclusion.applyProjection(Document{{"a", 1}, {"b", 4}}); + ASSERT_EQ(result, expectedResult); + + // In this case, the field 'b' shows up first and has a nested inclusion or computed field. Even + // though it is a computed field, it will appear first in the output document. This is + // inconsistent, but the expected behavior, and a consequence of applying the projection + // recursively to each sub-document. + result = inclusion.applyProjection(Document{{"b", 4}, {"a", 1}}); + expectedResult = Document{{"b", Document{{"c", "NEW"}}}, {"a", 1}}; + ASSERT_EQ(result, expectedResult); +} + +TEST(InclusionProjectionExecutionTest, ComputedFieldReplacingExistingShouldAppearAfterInclusions) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("b" << wrapInLiteral("NEW") << "a" << true)); + auto result = inclusion.applyProjection(Document{{"b", 1}, {"a", 1}}); + auto expectedResult = Document{{"a", 1}, {"b", "NEW"}}; + ASSERT_EQ(result, expectedResult); + + result = inclusion.applyProjection(Document{{"a", 1}, {"b", 4}}); + ASSERT_EQ(result, expectedResult); +} + +// +// Misc. +// + +TEST(InclusionProjectionExecutionTest, ShouldAlwaysKeepMetadataFromOriginalDoc) { + ParsedInclusionProjection inclusion; + inclusion.parse(BSON("a" << true)); + + MutableDocument inputDocBuilder(Document{{"a", 1}}); + inputDocBuilder.setRandMetaField(1.0); + inputDocBuilder.setTextScore(10.0); + Document inputDoc = inputDocBuilder.freeze(); + + auto result = inclusion.applyProjection(inputDoc); + + MutableDocument expectedDoc(inputDoc); + expectedDoc.copyMetaDataFrom(inputDoc); + ASSERT_EQ(result, expectedDoc.freeze()); +} + +} // namespace +} // namespace parsed_aggregation_projection +} // namespace mongo diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp index c08278c4737..bd3948243de 100644 --- a/src/mongo/db/pipeline/pipeline_test.cpp +++ b/src/mongo/db/pipeline/pipeline_test.cpp @@ -106,7 +106,7 @@ class MoveSkipBeforeProject : public Base { return "[{$project: {a : 1}}, {$skip : 5}]"; } string outputPipeJson() override { - return "[{$skip : 5}, {$project: {a : true}}]"; + return "[{$skip : 5}, {$project: {_id: true, a : true}}]"; } }; @@ -116,7 +116,7 @@ class MoveLimitBeforeProject : public Base { } string outputPipeJson() override { - return "[{$limit : 5}, {$project: {a : true}}]"; + return "[{$limit : 5}, {$project: {_id: true, a : true}}]"; } }; @@ -126,7 +126,7 @@ class MoveMultipleSkipsAndLimitsBeforeProject : public Base { } string outputPipeJson() override { - return "[{$limit : 5}, {$skip : 3}, {$project: {a : true}}]"; + return "[{$limit : 5}, {$skip : 3}, {$project: {_id: true, a : true}}]"; } }; @@ -159,7 +159,7 @@ class SortMatchProjSkipLimBecomesMatchTopKSortSkipProj : public Base { return "[{$match: {a: 1}}" ",{$sort: {sortKey: {a: 1}, limit: 8}}" ",{$skip: 3}" - ",{$project: {a: true}}" + ",{$project: {_id: true, a: true}}" "]"; } @@ -168,7 +168,7 @@ class SortMatchProjSkipLimBecomesMatchTopKSortSkipProj : public Base { ",{$sort: {a: 1}}" ",{$limit: 8}" ",{$skip : 3}" - ",{$project : {a: true}}" + ",{$project : {_id: true, a: true}}" "]"; } }; @@ -926,13 +926,13 @@ class ShardedSortMatchProjSkipLimBecomesMatchTopKSortSkipProj : public Base { string shardPipeJson() { return "[{$match: {a: 1}}" ",{$sort: {sortKey: {a: 1}, limit: 8}}" - ",{$project: {a: true, _id: true}}" + ",{$project: {_id: true, a: true}}" "]"; } string mergePipeJson() { return "[{$sort: {sortKey: {a: 1}, mergePresorted: true, limit: 8}}" ",{$skip: 3}" - ",{$project: {a: true}}" + ",{$project: {_id: true, a: true}}" "]"; } }; @@ -1044,7 +1044,7 @@ class Project : public needsPrimaryShardMergerBase { return "[{$project: {a : 1}}]"; } string shardPipeJson() { - return "[{$project: {a: true}}]"; + return "[{$project: {_id: true, a: true}}]"; } string mergePipeJson() { return "[]"; |