diff options
17 files changed, 1566 insertions, 206 deletions
diff --git a/jstests/aggregation/sources/addFields/use_cases.js b/jstests/aggregation/sources/addFields/use_cases.js new file mode 100644 index 00000000000..725408fb40d --- /dev/null +++ b/jstests/aggregation/sources/addFields/use_cases.js @@ -0,0 +1,64 @@ +/** + * $addFields can be used to add fixed and computed fields to documents while preserving the + * original document. Verify that using $addFields and adding computed fields in a $project yield + * the same result. + */ + +(function() { + "use strict"; + + // For arrayEq. + load("jstests/aggregation/extras/utils.js"); + + const dbName = "test"; + const collName = jsTest.name(); + + function doExecutionTest(conn) { + const coll = conn.getDB(dbName).getCollection(collName); + coll.drop(); + + // Insert a bunch of documents of the form above. + const nDocs = 10; + for (let i = 0; i < nDocs; i++) { + assert.writeOK(coll.insert({"_id": i, "2i": i * 2, "3i": i * 3})); + } + + // Add the minimum, maximum, and average temperatures, and make sure that doing the same + // with addFields yields the correct answer. + // First compute with $project, since we know all the fields in this document. + let projectPipe = [{ + $project: { + "2i": 1, + "3i": 1, + "6i^2": {"$multiply": ["$2i", "$3i"]}, + // _id is implicitly included. + } + }]; + let correct = coll.aggregate(projectPipe).toArray(); + + // Then compute the same results using $addFields. + let addFieldsPipe = [{ + $addFields: { + "6i^2": {"$multiply": ["$2i", "$3i"]}, + // All other fields are implicitly included. + } + }]; + let addFieldsResult = coll.aggregate(addFieldsPipe).toArray(); + + // Then assert they are the same. + assert(arrayEq(addFieldsResult, correct), + "$addFields does not work the same as a $project with computed and included fields"); + } + + // Test against the standalone started by resmoke.py. + let conn = db.getMongo(); + doExecutionTest(conn); + print("Success! Standalone execution use case test for $addFields passed."); + + // Test against a sharded cluster. + let st = new ShardingTest({shards: 2}); + doExecutionTest(st.s0); + st.stop(); + print("Success! Sharding use case test for $addFields passed."); + +}());
\ No newline at end of file diff --git a/jstests/aggregation/sources/addFields/weather.js b/jstests/aggregation/sources/addFields/weather.js new file mode 100644 index 00000000000..7e2be837329 --- /dev/null +++ b/jstests/aggregation/sources/addFields/weather.js @@ -0,0 +1,101 @@ +/** + * $addFields can be used to add fixed and computed fields to documents while preserving the + * original document. Verify that using $addFields and adding computed fields in a $project yield + * the same result. Use the sample case of computing weather metadata. + */ + +(function() { + "use strict"; + + // For arrayEq. + load("jstests/aggregation/extras/utils.js"); + + const dbName = "test"; + const collName = jsTest.name(); + + Random.setRandomSeed(); + + /** + * Helper to generate an array of specified length of numbers in the specified range. + */ + function randomArray(length, minValue, maxValue) { + let array = []; + for (let i = 0; i < length; i++) { + array.push((Random.rand() * (maxValue - minValue)) + minValue); + } + return array; + } + + /** + * Helper to generate a randomized document with the following schema: + * { + * month: <integer month of year>, + * day: <integer day of month>, + * temperatures: <array of 24 decimal temperatures> + * } + */ + function generateRandomDocument() { + const minTemp = -40; + const maxTemp = 120; + + return { + month: Random.randInt(12) + 1, // 1-12 + day: Random.randInt(31) + 1, // 1-31 + temperatures: randomArray(24, minTemp, maxTemp), + }; + } + + function doExecutionTest(conn) { + const coll = conn.getDB(dbName).getCollection(collName); + coll.drop(); + + // Insert a bunch of documents of the form above. + const nDocs = 10; + for (let i = 0; i < nDocs; i++) { + assert.writeOK(coll.insert(generateRandomDocument())); + } + + // Add the minimum, maximum, and average temperatures, and make sure that doing the same + // with addFields yields the correct answer. + // First compute with $project, since we know all the fields in this document. + let projectWeatherPipe = [{ + $project: { + "month": 1, + "day": 1, + "temperatures": 1, + "minTemp": {"$min": "$temperatures"}, + "maxTemp": {"$max": "$temperatures"}, + "average": {"$avg": "$temperatures"}, + // _id is implicitly included. + } + }]; + let correctWeather = coll.aggregate(projectWeatherPipe).toArray(); + + // Then compute the same results using $addFields. + let addFieldsWeatherPipe = [{ + $addFields: { + "minTemp": {"$min": "$temperatures"}, + "maxTemp": {"$max": "$temperatures"}, + "average": {"$avg": "$temperatures"}, + // All other fields are implicitly included. + } + }]; + let addFieldsResult = coll.aggregate(addFieldsWeatherPipe).toArray(); + + // Then assert they are the same. + assert(arrayEq(addFieldsResult, correctWeather), + "$addFields does not work the same as a $project with computed and included fields"); + } + + // Test against the standalone started by resmoke.py. + let conn = db.getMongo(); + doExecutionTest(conn); + print("Success! Standalone execution weather test for $addFields passed."); + + // Test against a sharded cluster. + let st = new ShardingTest({shards: 2}); + doExecutionTest(st.s0); + st.stop(); + print("Success! Sharding weather test for $addFields passed."); + +}());
\ No newline at end of file diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript index e93717e96d9..8a9fe45f074 100644 --- a/src/mongo/db/pipeline/SConscript +++ b/src/mongo/db/pipeline/SConscript @@ -196,6 +196,7 @@ docSourceEnv.Library( target='document_source', source=[ 'document_source.cpp', + 'document_source_add_fields.cpp', 'document_source_bucket.cpp', 'document_source_bucket_auto.cpp', 'document_source_coll_stats.cpp', @@ -213,6 +214,7 @@ docSourceEnv.Library( 'document_source_replace_root.cpp', 'document_source_sample.cpp', 'document_source_sample_from_random_cursor.cpp', + 'document_source_single_document_transformation.cpp', 'document_source_skip.cpp', 'document_source_sort.cpp', 'document_source_sort_by_count.cpp', @@ -376,6 +378,7 @@ env.Library( 'parsed_aggregation_projection.cpp', 'parsed_exclusion_projection.cpp', 'parsed_inclusion_projection.cpp', + 'parsed_add_fields.cpp', ], LIBDEPS=[ 'expression', @@ -421,6 +424,15 @@ env.CppUnitTest( ], ) +env.CppUnitTest( + target='parsed_add_fields_test', + source='parsed_add_fields_test.cpp', + LIBDEPS=[ + 'document_value_test_util', + 'parsed_aggregation_projection', + ], +) + env.Library( target='serveronly', source=[ diff --git a/src/mongo/db/pipeline/document_source.h b/src/mongo/db/pipeline/document_source.h index a171281feb0..0cdf70d02ca 100644 --- a/src/mongo/db/pipeline/document_source.h +++ b/src/mongo/db/pipeline/document_source.h @@ -51,7 +51,6 @@ #include "mongo/db/pipeline/expression_context.h" #include "mongo/db/pipeline/granularity_rounder.h" #include "mongo/db/pipeline/lookup_set_cache.h" -#include "mongo/db/pipeline/parsed_aggregation_projection.h" #include "mongo/db/pipeline/pipeline.h" #include "mongo/db/pipeline/value.h" #include "mongo/db/pipeline/value_comparator.h" @@ -989,6 +988,60 @@ public: BSONObjSet sorts; }; +/** + * This class is for DocumentSources that take in and return one document at a time, in a 1:1 + * transformation. It should only be used via an alias that passes the transformation logic through + * a ParsedSingleDocumentTransformation. It is not a registered DocumentSource, and it cannot be + * created from BSON. + */ +class DocumentSourceSingleDocumentTransformation final : public DocumentSource { +public: + /** + * This class defines the minimal interface that every parser wishing to take advantage of + * DocumentSourceSingleDocumentTransformation must implement. + * + * This interface ensures that DocumentSourceSingleDocumentTransformations are passed parsed + * objects that can execute the transformation and provide additional features like + * serialization and reporting and returning dependencies. The parser must also provide + * implementations for optimizing and adding the expression context, even if those functions do + * nothing. + * + * SERVER-25509 Make $replaceRoot use this framework. + */ + class TransformerInterface { + public: + virtual Document applyTransformation(Document input) = 0; + virtual void optimize() = 0; + virtual Document serialize(bool explain) const = 0; + virtual DocumentSource::GetDepsReturn addDependencies(DepsTracker* deps) const = 0; + virtual void injectExpressionContext( + const boost::intrusive_ptr<ExpressionContext>& pExpCtx) = 0; + }; + + DocumentSourceSingleDocumentTransformation( + const boost::intrusive_ptr<ExpressionContext>& pExpCtx, + std::unique_ptr<TransformerInterface> parsedTransform, + std::string name); + + // virtuals from DocumentSource + const char* getSourceName() const; + boost::optional<Document> getNext(); + boost::intrusive_ptr<DocumentSource> optimize(); + void dispose(); + Value serialize(bool explain) const; + Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr, + Pipeline::SourceContainer* container); + void doInjectExpressionContext(); + DocumentSource::GetDepsReturn getDependencies(DepsTracker* deps) const; + +private: + // Stores transformation logic. + std::unique_ptr<TransformerInterface> _parsedTransform; + + // Specific name of the transformation. + std::string _name; +}; + class DocumentSourceOut final : public DocumentSourceNeedsMongod, public SplittableDocumentSource { public: // virtuals from DocumentSource @@ -1047,48 +1100,6 @@ private: const NamespaceString _outputNs; // output will go here after all data is processed. }; - -class DocumentSourceProject final : public DocumentSource { -public: - boost::optional<Document> getNext() final; - const char* getSourceName() const final; - Value serialize(bool explain = false) const final; - void dispose() final; - - /** - * Adds any paths that are included via this projection, or that are referenced by any - * expressions. - */ - GetDepsReturn getDependencies(DepsTracker* deps) const final; - - /** - * Attempt to move a subsequent $skip or $limit stage before the $project, thus reducing the - * number of documents that pass through this stage. - */ - Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr, - Pipeline::SourceContainer* container) final; - - /** - * Optimize any expressions being used in this stage. - */ - boost::intrusive_ptr<DocumentSource> optimize() final; - - void doInjectExpressionContext() final; - - /** - * Parse the projection from the user-supplied BSON. - */ - static boost::intrusive_ptr<DocumentSource> createFromBson( - BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx); - -private: - DocumentSourceProject( - const boost::intrusive_ptr<ExpressionContext>& expCtx, - std::unique_ptr<parsed_aggregation_projection::ParsedAggregationProjection> parsedProject); - - std::unique_ptr<parsed_aggregation_projection::ParsedAggregationProjection> _parsedProject; -}; - class DocumentSourceRedact final : public DocumentSource { public: boost::optional<Document> getNext() final; @@ -1952,6 +1963,39 @@ private: }; /** + * The $project stage can be used for simple transformations such as including or excluding a set + * of fields, or can do more sophisticated things, like include some fields and add new "computed" + * fields, using the expression language. Note you can not mix an exclusion-style projection with + * adding or including any other fields. + */ +class DocumentSourceProject final { +public: + // Since $project was once a DocumentSource, other stages that use it expect a pointer instead + // of a vector. Use the create function to get a single stage. + static boost::intrusive_ptr<DocumentSource> create( + BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx); + + static std::vector<boost::intrusive_ptr<DocumentSource>> createFromBson( + BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx); + +private: + DocumentSourceProject() = default; +}; + +/** + * $addFields adds or replaces the specified fields to/in the document while preserving the original + * document. It is modeled on and throws the same errors as $project. + */ +class DocumentSourceAddFields final { +public: + static std::vector<boost::intrusive_ptr<DocumentSource>> createFromBson( + BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx); + +private: + DocumentSourceAddFields() = default; +}; + +/** * Provides a document source interface to retrieve collection-level statistics for a given * collection. */ diff --git a/src/mongo/db/pipeline/document_source_add_fields.cpp b/src/mongo/db/pipeline/document_source_add_fields.cpp new file mode 100644 index 00000000000..9e893e4cb7b --- /dev/null +++ b/src/mongo/db/pipeline/document_source_add_fields.cpp @@ -0,0 +1,58 @@ +/** + * Copyright 2016 (c) 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/pipeline/document_source.h" + +#include <boost/optional.hpp> +#include <boost/smart_ptr/intrusive_ptr.hpp> + +#include "mongo/db/pipeline/parsed_add_fields.h" + +namespace mongo { + +using boost::intrusive_ptr; +using parsed_aggregation_projection::ParsedAddFields; + +REGISTER_DOCUMENT_SOURCE_ALIAS(addFields, DocumentSourceAddFields::createFromBson); + +std::vector<intrusive_ptr<DocumentSource>> DocumentSourceAddFields::createFromBson( + BSONElement elem, const intrusive_ptr<ExpressionContext>& expCtx) { + + // Confirm that the stage was called with an object. + uassert(40269, + str::stream() << "$addFields specification stage must be an object, got " + << typeName(elem.type()), + elem.type() == Object); + + // Create the AddFields aggregation stage. + return {new DocumentSourceSingleDocumentTransformation( + expCtx, ParsedAddFields::create(elem.Obj()), "$addFields")}; +}; +} diff --git a/src/mongo/db/pipeline/document_source_count.cpp b/src/mongo/db/pipeline/document_source_count.cpp index 33835721831..6d1eb95d265 100644 --- a/src/mongo/db/pipeline/document_source_count.cpp +++ b/src/mongo/db/pipeline/document_source_count.cpp @@ -66,7 +66,7 @@ vector<intrusive_ptr<DocumentSource>> DocumentSourceCount::createFromBson( BSONObj projectObj = BSON("$project" << BSON("_id" << 0 << elemString << 1)); auto groupSource = DocumentSourceGroup::createFromBson(groupObj.firstElement(), pExpCtx); - auto projectSource = DocumentSourceProject::createFromBson(projectObj.firstElement(), pExpCtx); + auto projectSource = DocumentSourceProject::create(projectObj.firstElement(), pExpCtx); return {groupSource, projectSource}; } diff --git a/src/mongo/db/pipeline/document_source_project.cpp b/src/mongo/db/pipeline/document_source_project.cpp index 5ad0876a7b8..0b42f7cb6f4 100644 --- a/src/mongo/db/pipeline/document_source_project.cpp +++ b/src/mongo/db/pipeline/document_source_project.cpp @@ -33,11 +33,7 @@ #include <boost/optional.hpp> #include <boost/smart_ptr/intrusive_ptr.hpp> -#include "mongo/db/pipeline/document.h" -#include "mongo/db/pipeline/expression.h" #include "mongo/db/pipeline/parsed_aggregation_projection.h" -#include "mongo/db/pipeline/value.h" -#include "mongo/stdx/memory.h" namespace mongo { @@ -45,80 +41,19 @@ using boost::intrusive_ptr; using parsed_aggregation_projection::ParsedAggregationProjection; using parsed_aggregation_projection::ProjectionType; -DocumentSourceProject::DocumentSourceProject( - const intrusive_ptr<ExpressionContext>& expCtx, - std::unique_ptr<ParsedAggregationProjection> parsedProject) - : DocumentSource(expCtx), _parsedProject(std::move(parsedProject)) {} +REGISTER_DOCUMENT_SOURCE_ALIAS(project, DocumentSourceProject::createFromBson); -REGISTER_DOCUMENT_SOURCE(project, DocumentSourceProject::createFromBson); - -const char* DocumentSourceProject::getSourceName() const { - return "$project"; -} - -boost::optional<Document> DocumentSourceProject::getNext() { - pExpCtx->checkForInterrupt(); - - auto input = pSource->getNext(); - if (!input) { - return boost::none; - } - - return _parsedProject->applyProjection(*input); -} - -intrusive_ptr<DocumentSource> DocumentSourceProject::optimize() { - _parsedProject->optimize(); - return this; -} - -Pipeline::SourceContainer::iterator DocumentSourceProject::optimizeAt( - Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { - invariant(*itr == this); - - auto nextSkip = dynamic_cast<DocumentSourceSkip*>((*std::next(itr)).get()); - auto nextLimit = dynamic_cast<DocumentSourceLimit*>((*std::next(itr)).get()); - - if (nextSkip || nextLimit) { - // Swap the $limit/$skip before ourselves, thus reducing the number of documents that - // pass through the $project. - std::swap(*itr, *std::next(itr)); - return itr == container->begin() ? itr : std::prev(itr); - } - return std::next(itr); -} - -void DocumentSourceProject::dispose() { - _parsedProject.reset(); -} - -Value DocumentSourceProject::serialize(bool explain) const { - return Value(Document{{getSourceName(), _parsedProject->serialize(explain)}}); -} - -intrusive_ptr<DocumentSource> DocumentSourceProject::createFromBson( - BSONElement elem, const intrusive_ptr<ExpressionContext>& expCtx) { +intrusive_ptr<DocumentSource> DocumentSourceProject::create( + BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& expCtx) { uassert(15969, "$project specification must be an object", elem.type() == Object); - return new DocumentSourceProject(expCtx, ParsedAggregationProjection::create(elem.Obj())); + return new DocumentSourceSingleDocumentTransformation( + expCtx, ParsedAggregationProjection::create(elem.Obj()), "$project"); } -DocumentSource::GetDepsReturn DocumentSourceProject::getDependencies(DepsTracker* deps) const { - // Add any fields referenced by the projection. - _parsedProject->addDependencies(deps); - - if (_parsedProject->getType() == ProjectionType::kInclusion) { - // Stop looking for further dependencies later in the pipeline, since anything that is not - // explicitly included or added in this projection will not exist after this stage, so would - // be pointless to include in our dependencies. - return EXHAUSTIVE_FIELDS; - } else { - return SEE_NEXT; - } -} - -void DocumentSourceProject::doInjectExpressionContext() { - _parsedProject->injectExpressionContext(pExpCtx); +std::vector<intrusive_ptr<DocumentSource>> DocumentSourceProject::createFromBson( + BSONElement elem, const intrusive_ptr<ExpressionContext>& expCtx) { + return {DocumentSourceProject::create(elem, expCtx)}; } } // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_single_document_transformation.cpp b/src/mongo/db/pipeline/document_source_single_document_transformation.cpp new file mode 100644 index 00000000000..be4878417de --- /dev/null +++ b/src/mongo/db/pipeline/document_source_single_document_transformation.cpp @@ -0,0 +1,103 @@ +/** + * Copyright 2016 (c) 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/pipeline/document_source.h" + +#include <boost/smart_ptr/intrusive_ptr.hpp> + +#include "mongo/db/pipeline/document.h" +#include "mongo/db/pipeline/expression.h" +#include "mongo/db/pipeline/value.h" + +namespace mongo { + +using boost::intrusive_ptr; + +DocumentSourceSingleDocumentTransformation::DocumentSourceSingleDocumentTransformation( + const intrusive_ptr<ExpressionContext>& pExpCtx, + std::unique_ptr<TransformerInterface> parsedTransform, + std::string name) + : DocumentSource(pExpCtx), _parsedTransform(std::move(parsedTransform)), _name(name) {} + +const char* DocumentSourceSingleDocumentTransformation::getSourceName() const { + return _name.c_str(); +} + +boost::optional<Document> DocumentSourceSingleDocumentTransformation::getNext() { + pExpCtx->checkForInterrupt(); + + // Get the next input document. + boost::optional<Document> input = pSource->getNext(); + if (!input) { + return boost::none; + } + + // Apply and return the document with added fields. + return _parsedTransform->applyTransformation(*input); +} + +intrusive_ptr<DocumentSource> DocumentSourceSingleDocumentTransformation::optimize() { + _parsedTransform->optimize(); + return this; +} + +void DocumentSourceSingleDocumentTransformation::dispose() { + _parsedTransform.reset(); +} + +Value DocumentSourceSingleDocumentTransformation::serialize(bool explain) const { + return Value(Document{{getSourceName(), _parsedTransform->serialize(explain)}}); +} + +Pipeline::SourceContainer::iterator DocumentSourceSingleDocumentTransformation::optimizeAt( + Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { + invariant(*itr == this); + auto nextSkip = dynamic_cast<DocumentSourceSkip*>((*std::next(itr)).get()); + auto nextLimit = dynamic_cast<DocumentSourceLimit*>((*std::next(itr)).get()); + + if (nextSkip || nextLimit) { + std::swap(*itr, *std::next(itr)); + return itr == container->begin() ? itr : std::prev(itr); + } + return std::next(itr); +} + +DocumentSource::GetDepsReturn DocumentSourceSingleDocumentTransformation::getDependencies( + DepsTracker* deps) const { + // Each parsed transformation is responsible for adding its own dependencies, and returning + // the correct dependency return type for that transformation. + return _parsedTransform->addDependencies(deps); +} + +void DocumentSourceSingleDocumentTransformation::doInjectExpressionContext() { + _parsedTransform->injectExpressionContext(pExpCtx); +} + +} // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_test.cpp b/src/mongo/db/pipeline/document_source_test.cpp index 9ba8bc1fecb..d52eb018e7b 100644 --- a/src/mongo/db/pipeline/document_source_test.cpp +++ b/src/mongo/db/pipeline/document_source_test.cpp @@ -1378,7 +1378,7 @@ protected: void createProject(const BSONObj& projection) { BSONObj spec = BSON("$project" << projection); BSONElement specElement = spec.firstElement(); - _project = DocumentSourceProject::createFromBson(specElement, ctx()); + _project = DocumentSourceProject::create(specElement, ctx()); } DocumentSource* project() { @@ -3838,7 +3838,9 @@ public: const auto* groupStage = dynamic_cast<DocumentSourceGroup*>(result[0].get()); ASSERT(groupStage); - const auto* projectStage = dynamic_cast<DocumentSourceProject*>(result[1].get()); + // Project stages are actually implemented as SingleDocumentTransformations. + const auto* projectStage = + dynamic_cast<DocumentSourceSingleDocumentTransformation*>(result[1].get()); ASSERT(projectStage); const bool explain = true; @@ -4738,6 +4740,135 @@ TEST_F(BucketAutoTests, ShouldFailOnNegativeNumbersWhenGranularitySpecified) { } } // namespace DocumentSourceBucketAuto +namespace DocumentSourceAddFields { + +using mongo::DocumentSourceMock; +using mongo::DocumentSourceAddFields; + +// +// DocumentSourceAddFields delegates much of its responsibilities to the ParsedAddFields, which +// derives from ParsedAggregationProjection. +// Most of the functional tests are testing ParsedAddFields directly. These are meant as +// simpler integration tests. +// + +/** + * Class which provides useful helpers to test the functionality of the $addFields stage. + */ +class AddFieldsTest : public Mock::Base, public unittest::Test { + +public: + AddFieldsTest() : _mock(DocumentSourceMock::create()) {} + +protected: + /** + * Creates the $addFields stage, which can be accessed via addFields(). + */ + void createAddFields(const BSONObj& fieldsToAdd) { + BSONObj spec = BSON("$addFields" << fieldsToAdd); + BSONElement specElement = spec.firstElement(); + _addFields = DocumentSourceAddFields::createFromBson(specElement, ctx())[0]; + addFields()->setSource(_mock.get()); + } + + DocumentSource* addFields() { + return _addFields.get(); + } + + DocumentSourceMock* source() { + return _mock.get(); + } + + /** + * Assert that iterator state accessors consistently report the source is exhausted. + */ + void assertExhausted() const { + ASSERT(!_addFields->getNext()); + ASSERT(!_addFields->getNext()); + ASSERT(!_addFields->getNext()); + } + +private: + intrusive_ptr<DocumentSource> _addFields; + intrusive_ptr<DocumentSourceMock> _mock; +}; + +// Verify that the addFields stage keeps existing fields in order when replacing fields, and adds +// new fields at the end of the document. +TEST_F(AddFieldsTest, KeepsUnspecifiedFieldsReplacesFieldsAndAddsNewFields) { + createAddFields(BSON("e" << 2 << "b" << BSON("c" << 3))); + source()->queue.push_back(Document{{"a", 1}, {"b", Document{{"c", 1}}}, {"d", 1}}); + boost::optional<Document> next = addFields()->getNext(); + ASSERT_TRUE(bool(next)); + Document expected = Document{{"a", 1}, {"b", Document{{"c", 3}}}, {"d", 1}, {"e", 2}}; + ASSERT_DOCUMENT_EQ(*next, expected); +} + +// Verify that the addFields stage optimizes expressions passed as input to added fields. +TEST_F(AddFieldsTest, OptimizesInnerExpressions) { + createAddFields(BSON("a" << BSON("$and" << BSON_ARRAY(BSON("$const" << true))))); + addFields()->optimize(); + // The $and should have been replaced with its only argument. + vector<Value> serializedArray; + addFields()->serializeToArray(serializedArray); + ASSERT_EQUALS(serializedArray[0].getDocument().toBson(), + fromjson("{$addFields: {a: {$const: true}}}")); +} + +// Verify that the addFields stage requires a valid object specification. +TEST_F(AddFieldsTest, ShouldErrorOnNonObjectSpec) { + // Can't use createAddFields() helper because we want to give a non-object spec. + BSONObj spec = BSON("$addFields" + << "foo"); + BSONElement specElement = spec.firstElement(); + ASSERT_THROWS_CODE( + DocumentSourceAddFields::createFromBson(specElement, ctx()), UserException, 40269); +} + +// Verify that mutiple documents can be processed in a row with the addFields stage. +TEST_F(AddFieldsTest, ProcessesMultipleDocuments) { + createAddFields(BSON("a" << 10)); + source()->queue.push_back(Document{{"a", 1}, {"b", 2}}); + source()->queue.push_back(Document{{"c", 3}, {"d", 4}}); + + boost::optional<Document> next = addFields()->getNext(); + ASSERT(bool(next)); + Document expected = Document{{"a", 10}, {"b", 2}}; + ASSERT_DOCUMENT_EQ(*next, (Document{{"a", 10}, {"b", 2}})); + + next = addFields()->getNext(); + ASSERT(bool(next)); + expected = Document{{"c", 3}, {"d", 4}, {"a", 10}}; + ASSERT_DOCUMENT_EQ(*next, expected); + + assertExhausted(); +} + +// Verify that the addFields stage correctly reports its dependencies. +TEST_F(AddFieldsTest, AddsDependenciesOfIncludedAndComputedFields) { + createAddFields( + fromjson("{a: true, x: '$b', y: {$and: ['$c','$d']}, z: {$meta: 'textScore'}}")); + DepsTracker dependencies(DepsTracker::MetadataAvailable::kTextScore); + ASSERT_EQUALS(DocumentSource::SEE_NEXT, addFields()->getDependencies(&dependencies)); + ASSERT_EQUALS(3U, dependencies.fields.size()); + + // No implicit _id dependency. + ASSERT_EQUALS(0U, dependencies.fields.count("_id")); + + // Replaced field is not dependent. + ASSERT_EQUALS(0U, dependencies.fields.count("a")); + + // Field path expression dependency. + ASSERT_EQUALS(1U, dependencies.fields.count("b")); + + // Nested expression dependencies. + ASSERT_EQUALS(1U, dependencies.fields.count("c")); + ASSERT_EQUALS(1U, dependencies.fields.count("d")); + ASSERT_EQUALS(false, dependencies.needWholeDocument); + ASSERT_EQUALS(true, dependencies.getNeedTextScore()); +} +} // namespace DocumentSourceAddFields + class All : public Suite { public: All() : Suite("documentsource") {} diff --git a/src/mongo/db/pipeline/parsed_add_fields.cpp b/src/mongo/db/pipeline/parsed_add_fields.cpp new file mode 100644 index 00000000000..3f9b5427a32 --- /dev/null +++ b/src/mongo/db/pipeline/parsed_add_fields.cpp @@ -0,0 +1,142 @@ +/** + * Copyright (C) 2016 MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/pipeline/parsed_add_fields.h" + +#include <algorithm> + +#include "mongo/db/pipeline/parsed_aggregation_projection.h" + +namespace mongo { + +namespace parsed_aggregation_projection { + +std::unique_ptr<ParsedAddFields> ParsedAddFields::create(const BSONObj& spec) { + // Verify that we don't have conflicting field paths, etc. + Status status = ProjectionSpecValidator::validate(spec); + if (!status.isOK()) { + uasserted(status.location(), + str::stream() << "Invalid $addFields specification: " << status.reason()); + } + std::unique_ptr<ParsedAddFields> parsedAddFields = stdx::make_unique<ParsedAddFields>(); + + // Actually parse the specification. + parsedAddFields->parse(spec); + return parsedAddFields; +} + +void ParsedAddFields::parse(const BSONObj& spec, const VariablesParseState& variablesParseState) { + for (auto elem : spec) { + auto fieldName = elem.fieldNameStringData(); + + if (elem.type() == BSONType::Object) { + // This is either an expression, or a nested specification. + if (parseObjectAsExpression(fieldName, elem.Obj(), variablesParseState)) { + // It was an expression. + } else { + // The field name might be a dotted path. If so, we need to keep adding children + // to our tree until we create a child that represents that path. + auto remainingPath = FieldPath(elem.fieldName()); + auto child = _root.get(); + while (remainingPath.getPathLength() > 1) { + child = child->addOrGetChild(remainingPath.getFieldName(0)); + remainingPath = remainingPath.tail(); + } + // It is illegal to construct an empty FieldPath, so the above loop ends one + // iteration too soon. Add the last path here. + child = child->addOrGetChild(remainingPath.fullPath()); + parseSubObject(elem.Obj(), variablesParseState, child); + } + } else { + // This is a literal or regular value. + _root->addComputedField(FieldPath(elem.fieldName()), + Expression::parseOperand(elem, variablesParseState)); + } + } +} + +Document ParsedAddFields::applyProjection(Document inputDoc, Variables* vars) const { + // All expressions will be evaluated in the context of the input document, before any + // transformations have been applied. + vars->setRoot(inputDoc); + + // The output doc is the same as the input doc, with the added fields. + MutableDocument output(inputDoc); + _root->addComputedFields(&output, vars); + + // Pass through the metadata. + output.copyMetaDataFrom(inputDoc); + return output.freeze(); +} + +bool ParsedAddFields::parseObjectAsExpression(StringData pathToObject, + const BSONObj& objSpec, + const VariablesParseState& variablesParseState) { + if (objSpec.firstElementFieldName()[0] == '$') { + // This is an expression like {$add: [...]}. We have already verified that it has only one + // field. + invariant(objSpec.nFields() == 1); + _root->addComputedField(pathToObject.toString(), + Expression::parseExpression(objSpec, variablesParseState)); + return true; + } + return false; +} + +void ParsedAddFields::parseSubObject(const BSONObj& subObj, + const VariablesParseState& variablesParseState, + InclusionNode* node) { + for (auto&& elem : subObj) { + invariant(elem.fieldName()[0] != '$'); + // Dotted paths in a sub-object have already been detected and disallowed by the function + // ProjectionSpecValidator::validate(). + invariant(elem.fieldNameStringData().find('.') == std::string::npos); + + if (elem.type() == BSONType::Object) { + // This is either an expression, or a nested specification. + auto fieldName = elem.fieldNameStringData().toString(); + if (!parseObjectAsExpression( + FieldPath::getFullyQualifiedPath(node->getPath(), fieldName), + elem.Obj(), + variablesParseState)) { + // It was a nested subobject + auto child = node->addOrGetChild(fieldName); + parseSubObject(elem.Obj(), variablesParseState, child); + } + } else { + // This is a literal or regular value. + node->addComputedField(FieldPath(elem.fieldName()), + Expression::parseOperand(elem, variablesParseState)); + } + } +} + +} // namespace parsed_aggregation_projection +} // namespace mongo diff --git a/src/mongo/db/pipeline/parsed_add_fields.h b/src/mongo/db/pipeline/parsed_add_fields.h new file mode 100644 index 00000000000..240b17faead --- /dev/null +++ b/src/mongo/db/pipeline/parsed_add_fields.h @@ -0,0 +1,152 @@ +/** + * Copyright (C) 2016 MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <memory> + +#include "mongo/db/pipeline/expression.h" +#include "mongo/db/pipeline/expression_context.h" +#include "mongo/db/pipeline/parsed_aggregation_projection.h" +#include "mongo/db/pipeline/parsed_inclusion_projection.h" +#include "mongo/stdx/memory.h" + +namespace mongo { + +namespace parsed_aggregation_projection { + +/** + * A ParsedAddFields represents a parsed form of the raw BSON specification for the AddFields + * stage. + * + * This class is mostly a wrapper around an InclusionNode tree. It contains logic to parse a + * specification object into the corresponding InclusionNode tree, but defers most execution logic + * to the underlying tree. In this way it is similar to ParsedInclusionProjection, but it differs + * by not applying inclusions before adding computed fields, thus keeping all existing fields. + */ +class ParsedAddFields : public ParsedAggregationProjection { +public: + ParsedAddFields() : ParsedAggregationProjection(), _root(new InclusionNode()) {} + + /** + * Creates the data needed to perform an AddFields. + * Verifies that there are no conflicting paths in the specification. + * Overrides the ParsedAggregationProjection's create method. + */ + static std::unique_ptr<ParsedAddFields> create(const BSONObj& spec); + + ProjectionType getType() const final { + return ProjectionType::kComputed; + } + + /** + * Parses the addFields specification given by 'spec', populating internal data structures. + */ + void parse(const BSONObj& spec) final { + VariablesIdGenerator idGenerator; + VariablesParseState variablesParseState(&idGenerator); + parse(spec, variablesParseState); + _variables = stdx::make_unique<Variables>(idGenerator.getIdCount()); + } + + Document serialize(bool explain = false) const final { + MutableDocument output; + _root->serialize(&output, explain); + return output.freeze(); + } + + /** + * Optimizes any computed expressions. + */ + void optimize() final { + _root->optimize(); + } + + void injectExpressionContext(const boost::intrusive_ptr<ExpressionContext>& expCtx) final { + _root->injectExpressionContext(expCtx); + } + + DocumentSource::GetDepsReturn addDependencies(DepsTracker* deps) const final { + _root->addDependencies(deps); + return DocumentSource::SEE_NEXT; + } + + /** + * Add the specified fields to 'inputDoc'. + * + * Replaced fields will remain in their original place in the document, while new added fields + * will be added to the end of the document in the order in which they were specified to the + * $addFields stage. + * + * Arrays will be traversed, with any dotted/nested computed fields applied to each element in + * the array. For example, setting "a.0": "hello" will add a field "0" to every object + * in the array "a". If there is an element in "a" that is not an object, it will be replaced + * with {"0": "hello"}. See SERVER-25200 for more details. + */ + Document applyProjection(Document inputDoc) const final { + _variables->setRoot(inputDoc); + return applyProjection(inputDoc, _variables.get()); + } + + Document applyProjection(Document inputDoc, Variables* vars) const; + +private: + /** + * Parses 'spec' to determine which fields to add. + */ + void parse(const BSONObj& spec, const VariablesParseState& variablesParseState); + + /** + * Attempts to parse 'objSpec' as an expression like {$add: [...]}. Adds a computed field to + * '_root' and returns true if it was successfully parsed as an expression. Returns false if it + * was not an expression specification. + * + * Throws an error if it was determined to be an expression specification, but failed to parse + * as a valid expression. + */ + bool parseObjectAsExpression(StringData pathToObject, + const BSONObj& objSpec, + const VariablesParseState& variablesParseState); + + /** + * Traverses 'subObj' and parses each field. Adds any computed fields at this level + * to 'node'. + */ + void parseSubObject(const BSONObj& subObj, + const VariablesParseState& variablesParseState, + InclusionNode* node); + + // The InclusionNode tree does most of the execution work once constructed. + std::unique_ptr<InclusionNode> _root; + + // This is needed to give the expressions knowledge about the context in which they are being + // executed. + std::unique_ptr<Variables> _variables; +}; +} // namespace parsed_aggregation_projection +} // namespace mongo diff --git a/src/mongo/db/pipeline/parsed_add_fields_test.cpp b/src/mongo/db/pipeline/parsed_add_fields_test.cpp new file mode 100644 index 00000000000..1bdb8409461 --- /dev/null +++ b/src/mongo/db/pipeline/parsed_add_fields_test.cpp @@ -0,0 +1,497 @@ +/** + * Copyright (C) 2016 MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/pipeline/parsed_add_fields.h" + +#include <vector> + +#include "mongo/bson/bsonmisc.h" +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/bson/json.h" +#include "mongo/db/pipeline/dependencies.h" +#include "mongo/db/pipeline/document.h" +#include "mongo/db/pipeline/document_value_test_util.h" +#include "mongo/db/pipeline/value.h" +#include "mongo/unittest/unittest.h" + +namespace mongo { +namespace parsed_aggregation_projection { +namespace { +using std::vector; + +// These ParsedAddFields spec tests are a subset of the ParsedAggregationProjection creation tests. +// ParsedAddField should behave the same way, but does not use the same creation, so we include +// an abbreviation of the same tests here. + +// Verify that ParsedAddFields rejects specifications with conflicting field paths. +TEST(ParsedAddFieldsSpec, ThrowsOnCreationWithConflictingFieldPaths) { + // These specs contain the same exact path. + ASSERT_THROWS(ParsedAddFields::create(BSON("a" << 1 << "a" << 2)), UserException); + ASSERT_THROWS(ParsedAddFields::create(BSON("a" << BSON("b" << 1 << "b" << 2))), UserException); + ASSERT_THROWS(ParsedAddFields::create(BSON("_id" << 3 << "_id" << true)), UserException); + + // These specs contain overlapping paths. + ASSERT_THROWS(ParsedAddFields::create(BSON("a" << 1 << "a.b" << 2)), UserException); + ASSERT_THROWS(ParsedAddFields::create(BSON("a.b.c" << 1 << "a" << 2)), UserException); + ASSERT_THROWS(ParsedAddFields::create(BSON("_id" << true << "_id.x" << true)), UserException); +} + +// Verify that ParsedAddFields rejects specifications that contain invalid field paths. +TEST(ParsedAddFieldsSpec, ThrowsOnCreationWithInvalidFieldPath) { + // Dotted subfields are not allowed. + ASSERT_THROWS(ParsedAddFields::create(BSON("a" << BSON("b.c" << true))), UserException); + + // The user cannot start a field with $. + ASSERT_THROWS(ParsedAddFields::create(BSON("$dollar" << 0)), UserException); + ASSERT_THROWS(ParsedAddFields::create(BSON("c.$d" << true)), UserException); + + // Empty field names should throw an error. + ASSERT_THROWS(ParsedAddFields::create(BSON("" << 2)), UserException); + ASSERT_THROWS(ParsedAddFields::create(BSON("a" << BSON("" << true))), UserException); + ASSERT_THROWS(ParsedAddFields::create(BSON("" << BSON("a" << true))), UserException); + ASSERT_THROWS(ParsedAddFields::create(BSON("a." << true)), UserException); + ASSERT_THROWS(ParsedAddFields::create(BSON(".a" << true)), UserException); +} + +// Verify that ParsedAddFields rejects specifications that contain empty objects or invalid +// expressions. +TEST(ParsedAddFieldsSpec, ThrowsOnCreationWithInvalidObjectsOrExpressions) { + // Invalid expressions should be rejected. + ASSERT_THROWS( + ParsedAddFields::create(BSON("a" << BSON("$add" << BSON_ARRAY(4 << 2) << "b" << 1))), + UserException); + ASSERT_THROWS(ParsedAddFields::create(BSON("a" << BSON("$gt" << BSON("bad" + << "arguments")))), + UserException); + ASSERT_THROWS(ParsedAddFields::create( + BSON("a" << false << "b" << BSON("$unknown" << BSON_ARRAY(4 << 2)))), + UserException); + + // Empty specifications are not allowed. + ASSERT_THROWS(ParsedAddFields::create(BSONObj()), UserException); + + // Empty nested objects are not allowed. + ASSERT_THROWS(ParsedAddFields::create(BSON("a" << BSONObj())), UserException); +} + +TEST(ParsedAddFields, DoesNotErrorOnTwoNestedFields) { + ParsedAddFields::create(BSON("a.b" << true << "a.c" << true)); + ParsedAddFields::create(BSON("a.b" << true << "a" << BSON("c" << true))); +} + +// Verify that replaced fields are not included as dependencies. +TEST(ParsedAddFieldsDeps, RemovesReplaceFieldsFromDependencies) { + ParsedAddFields addition; + addition.parse(BSON("a" << true)); + + DepsTracker deps; + addition.addDependencies(&deps); + + ASSERT_EQ(deps.fields.size(), 0UL); + ASSERT_EQ(deps.fields.count("_id"), 0UL); // Not explicitly included. + ASSERT_EQ(deps.fields.count("a"), 0UL); // Set to true. +} + +// Verify that adding nested fields keeps the top-level field as a dependency. +TEST(ParsedAddFieldsDeps, IncludesTopLevelFieldInDependenciesWhenAddingNestedFields) { + ParsedAddFields addition; + addition.parse(BSON("x.y" << true)); + + DepsTracker deps; + addition.addDependencies(&deps); + + ASSERT_EQ(deps.fields.size(), 1UL); + ASSERT_EQ(deps.fields.count("_id"), 0UL); // Not explicitly included. + ASSERT_EQ(deps.fields.count("x.y"), 0UL); // Set to true. + ASSERT_EQ(deps.fields.count("x"), 1UL); // Top-level of nested field included. +} + +// Verify that fields that an expression depends on are added to the dependencies. +TEST(ParsedAddFieldsDeps, AddsDependenciesForComputedFields) { + ParsedAddFields addition; + addition.parse(BSON("x.y" + << "$z" + << "a" + << "$b")); + + DepsTracker deps; + addition.addDependencies(&deps); + + ASSERT_EQ(deps.fields.size(), 3UL); + ASSERT_EQ(deps.fields.count("_id"), 0UL); // Not explicitly included. + ASSERT_EQ(deps.fields.count("z"), 1UL); // Needed by the ExpressionFieldPath for x.y. + ASSERT_EQ(deps.fields.count("x"), 1UL); // Preserves top-level field, for structure. + ASSERT_EQ(deps.fields.count("a"), 0UL); // Replaced, so omitted. + ASSERT_EQ(deps.fields.count("b"), 1UL); // Needed by the ExpressionFieldPath for a. +} + +// Verify that the serialization produces the correct output: converting numbers and literals to +// their corresponding $const form. +TEST(ParsedAddFieldsSerialize, SerializesToCorrectForm) { + ParsedAddFields addition; + addition.parse(fromjson("{a: {$add: ['$a', 2]}, b: {d: 3}, 'x.y': {$literal: 4}}")); + + auto expectedSerialization = Document( + fromjson("{a: {$add: [\"$a\", {$const: 2}]}, b: {d: {$const: 3}}, x: {y: {$const: 4}}}")); + + // Should be the same if we're serializing for explain or for internal use. + ASSERT_DOCUMENT_EQ(expectedSerialization, addition.serialize(false)); + ASSERT_DOCUMENT_EQ(expectedSerialization, addition.serialize(true)); +} + +// Verify that serialize treats the _id field as any other field: including when explicity included. +TEST(ParsedAddFieldsSerialize, AddsIdToSerializeWhenExplicitlyIncluded) { + ParsedAddFields addition; + addition.parse(BSON("_id" << false)); + + // Adds explicit "_id" setting field, serializes expressions. + auto expectedSerialization = Document(fromjson("{_id: {$const: false}}")); + + // Should be the same if we're serializing for explain or for internal use. + ASSERT_DOCUMENT_EQ(expectedSerialization, addition.serialize(false)); + ASSERT_DOCUMENT_EQ(expectedSerialization, addition.serialize(true)); +} + +// Verify that serialize treats the _id field as any other field: excluded when not explicitly +// listed in the specification. We add this check because it is different behavior from $project, +// yet they derive from the same parent class. If the parent class were to change, this test would +// fail. +TEST(ParsedAddFieldsSerialize, OmitsIdFromSerializeWhenNotIncluded) { + ParsedAddFields addition; + addition.parse(BSON("a" << true)); + + // Does not implicitly include "_id" field. + auto expectedSerialization = Document(fromjson("{a: {$const: true}}")); + + // Should be the same if we're serializing for explain or for internal use. + ASSERT_DOCUMENT_EQ(expectedSerialization, addition.serialize(false)); + ASSERT_DOCUMENT_EQ(expectedSerialization, addition.serialize(true)); +} + +// Verify that the $addFields stage optimizes expressions into simpler forms when possible. +TEST(ParsedAddFieldsOptimize, OptimizesTopLevelExpressions) { + ParsedAddFields addition; + addition.parse(BSON("a" << BSON("$add" << BSON_ARRAY(1 << 2)))); + addition.optimize(); + auto expectedSerialization = Document{{"a", Document{{"$const", 3}}}}; + + // Should be the same if we're serializing for explain or for internal use. + ASSERT_DOCUMENT_EQ(expectedSerialization, addition.serialize(false)); + ASSERT_DOCUMENT_EQ(expectedSerialization, addition.serialize(true)); +} + +// Verify that the $addFields stage optimizes expressions even when they are nested. +TEST(ParsedAddFieldsOptimize, ShouldOptimizeNestedExpressions) { + ParsedAddFields addition; + addition.parse(BSON("a.b" << BSON("$add" << BSON_ARRAY(1 << 2)))); + addition.optimize(); + auto expectedSerialization = Document{{"a", Document{{"b", Document{{"$const", 3}}}}}}; + + // Should be the same if we're serializing for explain or for internal use. + ASSERT_DOCUMENT_EQ(expectedSerialization, addition.serialize(false)); + ASSERT_DOCUMENT_EQ(expectedSerialization, addition.serialize(true)); +} + +// +// Top-level only. +// + +// Verify that a new field is added to the end of the document. +TEST(ParsedAddFieldsExecutionTest, AddsNewFieldToEndOfDocument) { + ParsedAddFields addition; + addition.parse(BSON("c" << 3)); + + // There are no fields in the document. + auto result = addition.applyProjection(Document{}); + auto expectedResult = Document{{"c", 3}}; + ASSERT_DOCUMENT_EQ(result, expectedResult); + + // There are fields in the document but none of them are the added field. + result = addition.applyProjection(Document{{"a", 1}, {"b", 2}}); + expectedResult = Document{{"a", 1}, {"b", 2}, {"c", 3}}; + ASSERT_DOCUMENT_EQ(result, expectedResult); +} + +// Verify that an existing field is replaced and stays in the same order in the document. +TEST(ParsedAddFieldsExecutionTest, ReplacesFieldThatAlreadyExistsInDocument) { + ParsedAddFields addition; + addition.parse(BSON("c" << 3)); + + // Specified field is the only field in the document, and is replaced. + auto result = addition.applyProjection(Document{{"c", 1}}); + auto expectedResult = Document{{"c", 3}}; + ASSERT_DOCUMENT_EQ(result, expectedResult); + + // Specified field is one of the fields in the document, and is replaced in its existing order. + result = addition.applyProjection(Document{{"c", 1}, {"b", 2}}); + expectedResult = Document{{"c", 3}, {"b", 2}}; + ASSERT_DOCUMENT_EQ(result, expectedResult); +} + +// Verify that replacing multiple fields preserves the original field order in the document. +TEST(ParsedAddFieldsExecutionTest, ReplacesMultipleFieldsWhilePreservingInputFieldOrder) { + ParsedAddFields addition; + addition.parse(BSON("second" + << "SECOND" + << "first" + << "FIRST")); + auto result = addition.applyProjection(Document{{"first", 0}, {"second", 1}, {"third", 2}}); + auto expectedResult = Document{{"first", "FIRST"}, {"second", "SECOND"}, {"third", 2}}; + ASSERT_DOCUMENT_EQ(result, expectedResult); +} + +// Verify that adding multiple fields adds the fields in the order specified. +TEST(ParsedAddFieldsExecutionTest, AddsNewFieldsAfterExistingFieldsInOrderSpecified) { + ParsedAddFields addition; + addition.parse(BSON("firstComputed" + << "FIRST" + << "secondComputed" + << "SECOND")); + auto result = addition.applyProjection(Document{{"first", 0}, {"second", 1}, {"third", 2}}); + auto expectedResult = Document{{"first", 0}, + {"second", 1}, + {"third", 2}, + {"firstComputed", "FIRST"}, + {"secondComputed", "SECOND"}}; + ASSERT_DOCUMENT_EQ(result, expectedResult); +} + +// Verify that both adding and replacing fields at the same time follows the same rules as doing +// each independently. +TEST(ParsedAddFieldsExecutionTest, ReplacesAndAddsNewFieldsWithSameOrderingRulesAsSeparately) { + ParsedAddFields addition; + addition.parse(BSON("firstComputed" + << "FIRST" + << "second" + << "SECOND")); + auto result = addition.applyProjection(Document{{"first", 0}, {"second", 1}, {"third", 2}}); + auto expectedResult = + Document{{"first", 0}, {"second", "SECOND"}, {"third", 2}, {"firstComputed", "FIRST"}}; + ASSERT_DOCUMENT_EQ(result, expectedResult); +} + +// Verify that _id is included just like a regular field, in whatever order it appears in the +// input document, when adding new fields. +TEST(ParsedAddFieldsExecutionTest, IdFieldIsKeptInOrderItAppearsInInputDocument) { + ParsedAddFields addition; + addition.parse(BSON("newField" + << "computedVal")); + auto result = addition.applyProjection(Document{{"_id", "ID"}, {"a", 1}}); + auto expectedResult = Document{{"_id", "ID"}, {"a", 1}, {"newField", "computedVal"}}; + ASSERT_DOCUMENT_EQ(result, expectedResult); + + result = addition.applyProjection(Document{{"a", 1}, {"_id", "ID"}}); + expectedResult = Document{{"a", 1}, {"_id", "ID"}, {"newField", "computedVal"}}; + ASSERT_DOCUMENT_EQ(result, expectedResult); +} + +// Verify that replacing or adding _id works just like any other field. +TEST(ParsedAddFieldsExecutionTest, ShouldReplaceIdWithComputedId) { + ParsedAddFields addition; + addition.parse(BSON("_id" + << "newId")); + auto result = addition.applyProjection(Document{{"_id", "ID"}, {"a", 1}}); + auto expectedResult = Document{{"_id", "newId"}, {"a", 1}}; + ASSERT_DOCUMENT_EQ(result, expectedResult); + + result = addition.applyProjection(Document{{"a", 1}, {"_id", "ID"}}); + expectedResult = Document{{"a", 1}, {"_id", "newId"}}; + ASSERT_DOCUMENT_EQ(result, expectedResult); + + result = addition.applyProjection(Document{{"a", 1}}); + expectedResult = Document{{"a", 1}, {"_id", "newId"}}; + ASSERT_DOCUMENT_EQ(result, expectedResult); +} + +// +// Adding nested fields. +// + +// Verify that adding a dotted field keeps the other fields in the subdocument. +TEST(ParsedAddFieldsExecutionTest, KeepsExistingSubFieldsWhenAddingSimpleDottedFieldToSubDoc) { + ParsedAddFields addition; + addition.parse(BSON("a.b" << true)); + + // More than one field in sub document. + auto result = addition.applyProjection(Document{{"a", Document{{"b", 1}, {"c", 2}}}}); + auto expectedResult = Document{{"a", Document{{"b", true}, {"c", 2}}}}; + ASSERT_DOCUMENT_EQ(result, expectedResult); + + // Specified field is the only field in the sub document. + result = addition.applyProjection(Document{{"a", Document{{"b", 1}}}}); + expectedResult = Document{{"a", Document{{"b", true}}}}; + ASSERT_DOCUMENT_EQ(result, expectedResult); + + // Specified field is not present in the sub document. + result = addition.applyProjection(Document{{"a", Document{{"c", 1}}}}); + expectedResult = Document{{"a", Document{{"c", 1}, {"b", true}}}}; + ASSERT_DOCUMENT_EQ(result, expectedResult); + + // There are no fields in sub document. + result = addition.applyProjection(Document{{"a", Document{}}}); + expectedResult = Document{{"a", Document{{"b", true}}}}; + ASSERT_DOCUMENT_EQ(result, expectedResult); +} + +// Verify that creating a dotted field creates the subdocument structure necessary. +TEST(ParsedAddFieldsExecutionTest, CreatesSubDocIfDottedAddedFieldDoesNotExist) { + ParsedAddFields addition; + addition.parse(BSON("sub.target" << true)); + + // Should add the path if it doesn't exist. + auto result = addition.applyProjection(Document{}); + auto expectedResult = Document{{"sub", Document{{"target", true}}}}; + ASSERT_DOCUMENT_EQ(result, expectedResult); + + // Should replace the second part of the path if that part already exists. + result = addition.applyProjection(Document{{"sub", "notADocument"}}); + expectedResult = Document{{"sub", Document{{"target", true}}}}; + ASSERT_DOCUMENT_EQ(result, expectedResult); +} + +// Verify that adding a dotted value to an array field sets the field in every element of the array. +// SERVER-25200: make this agree with $set. +TEST(ParsedAddFieldsExecutionTest, AppliesDottedAdditionToEachElementInArray) { + ParsedAddFields addition; + addition.parse(BSON("a.b" << true)); + + vector<Value> nestedValues = {Value(1), + Value(Document{}), + Value(Document{{"b", 1}}), + Value(Document{{"b", 1}, {"c", 2}}), + Value(vector<Value>{}), + Value(vector<Value>{Value(1), Value(Document{{"c", 1}})})}; + + // Adds the field "b" to every object in the array. Recurses on non-empty nested arrays. + vector<Value> expectedNestedValues = { + Value(Document{{"b", true}}), + Value(Document{{"b", true}}), + Value(Document{{"b", true}}), + Value(Document{{"b", true}, {"c", 2}}), + Value(vector<Value>{}), + Value(vector<Value>{Value(Document{{"b", true}}), Value(Document{{"c", 1}, {"b", true}})})}; + auto result = addition.applyProjection(Document{{"a", nestedValues}}); + auto expectedResult = Document{{"a", expectedNestedValues}}; + ASSERT_DOCUMENT_EQ(result, expectedResult); +} + +// Verify that creation of the subdocument structure works for many layers of nesting. +TEST(ParsedAddFieldsExecutionTest, CreatesNestedSubDocumentsAllTheWayToAddedField) { + ParsedAddFields addition; + addition.parse(BSON("a.b.c.d" + << "computedVal")); + + // Should add the path if it doesn't exist. + auto result = addition.applyProjection(Document{}); + auto expectedResult = + Document{{"a", Document{{"b", Document{{"c", Document{{"d", "computedVal"}}}}}}}}; + ASSERT_DOCUMENT_EQ(result, expectedResult); + + // Should replace non-documents with documents. + result = addition.applyProjection(Document{{"a", Document{{"b", "other"}}}}); + ASSERT_DOCUMENT_EQ(result, expectedResult); +} + +// Verify that _id is not special: we can add subfields to it as well. +TEST(ParsedAddFieldsExecutionTest, AddsSubFieldsOfId) { + ParsedAddFields addition; + addition.parse(BSON("_id.X" << true << "_id.Z" + << "NEW")); + auto result = addition.applyProjection(Document{{"_id", Document{{"X", 1}, {"Y", 2}}}}); + auto expectedResult = Document{{"_id", Document{{"X", true}, {"Y", 2}, {"Z", "NEW"}}}}; + ASSERT_DOCUMENT_EQ(result, expectedResult); +} + +// Verify that both ways of specifying nested fields -- both dotted notation and nesting -- +// can be used together in the same specification. +TEST(ParsedAddFieldsExecutionTest, ShouldAllowMixedNestedAndDottedFields) { + ParsedAddFields addition; + // Include all of "a.b", "a.c", "a.d", and "a.e". + // Add new computed fields "a.W", "a.X", "a.Y", and "a.Z". + addition.parse(BSON("a.b" << true << "a.c" << true << "a.W" + << "W" + << "a.X" + << "X" + << "a" + << BSON("d" << true << "e" << true << "Y" + << "Y" + << "Z" + << "Z"))); + auto result = addition.applyProjection( + Document{{"a", Document{{"b", "b"}, {"c", "c"}, {"d", "d"}, {"e", "e"}, {"f", "f"}}}}); + auto expectedResult = Document{{"a", + Document{{"b", true}, + {"c", true}, + {"d", true}, + {"e", true}, + {"f", "f"}, + {"W", "W"}, + {"X", "X"}, + {"Y", "Y"}, + {"Z", "Z"}}}}; + ASSERT_DOCUMENT_EQ(result, expectedResult); +} + +// Verify that adding nested fields preserves the addition order in the spec. +TEST(ParsedAddFieldsExecutionTest, AddsNestedAddedFieldsInOrderSpecified) { + ParsedAddFields addition; + addition.parse(BSON("b.d" + << "FIRST" + << "b.c" + << "SECOND")); + auto result = addition.applyProjection(Document{}); + auto expectedResult = Document{{"b", Document{{"d", "FIRST"}, {"c", "SECOND"}}}}; + ASSERT_DOCUMENT_EQ(result, expectedResult); +} + +// +// Misc/Metadata. +// + +// Verify that the metadata is kept from the original input document. +TEST(ParsedAddFieldsExecutionTest, AlwaysKeepsMetadataFromOriginalDoc) { + ParsedAddFields addition; + addition.parse(BSON("a" << true)); + + MutableDocument inputDocBuilder(Document{{"a", 1}}); + inputDocBuilder.setRandMetaField(1.0); + inputDocBuilder.setTextScore(10.0); + Document inputDoc = inputDocBuilder.freeze(); + + auto result = addition.applyProjection(inputDoc); + + MutableDocument expectedDoc(Document{{"a", true}}); + expectedDoc.copyMetaDataFrom(inputDoc); + ASSERT_DOCUMENT_EQ(result, expectedDoc.freeze()); +} + +} // namespace +} // namespace parsed_aggregation_projection +} // namespace mongo diff --git a/src/mongo/db/pipeline/parsed_aggregation_projection.cpp b/src/mongo/db/pipeline/parsed_aggregation_projection.cpp index 2beceb4889e..c0a53c8a23b 100644 --- a/src/mongo/db/pipeline/parsed_aggregation_projection.cpp +++ b/src/mongo/db/pipeline/parsed_aggregation_projection.cpp @@ -28,12 +28,12 @@ #include "mongo/platform/basic.h" +#include "mongo/db/pipeline/parsed_aggregation_projection.h" + #include <boost/optional.hpp> #include <string> #include <unordered_set> -#include "mongo/db/pipeline/parsed_aggregation_projection.h" - #include "mongo/bson/bsonelement.h" #include "mongo/bson/bsonobj.h" #include "mongo/db/matcher/expression_algo.h" @@ -46,60 +46,126 @@ namespace mongo { namespace parsed_aggregation_projection { +// +// ProjectionSpecValidator +// + +Status ProjectionSpecValidator::validate(const BSONObj& spec) { + return ProjectionSpecValidator(spec).validate(); +} + +Status ProjectionSpecValidator::ensurePathDoesNotConflictOrThrow(StringData path) { + for (auto&& seenPath : _seenPaths) { + if ((path == seenPath) || (expression::isPathPrefixOf(path, seenPath)) || + (expression::isPathPrefixOf(seenPath, path))) { + return Status(ErrorCodes::FailedToParse, + str::stream() << "specification contains two conflicting paths. " + "Cannot specify both '" + << path + << "' and '" + << seenPath + << "': " + << _rawObj.toString(), + 40176); + } + } + _seenPaths.emplace_back(path.toString()); + return Status::OK(); +} + +Status ProjectionSpecValidator::validate() { + if (_rawObj.isEmpty()) { + return Status( + ErrorCodes::FailedToParse, "specification must have at least one field", 40177); + } + for (auto&& elem : _rawObj) { + Status status = parseElement(elem, FieldPath(elem.fieldName())); + if (!status.isOK()) + return status; + } + return Status::OK(); +} + +Status ProjectionSpecValidator::parseElement(const BSONElement& elem, const FieldPath& pathToElem) { + if (elem.type() == BSONType::Object) { + return parseNestedObject(elem.Obj(), pathToElem); + } + return ensurePathDoesNotConflictOrThrow(pathToElem.fullPath()); +} + +Status ProjectionSpecValidator::parseNestedObject(const BSONObj& thisLevelSpec, + const FieldPath& prefix) { + if (thisLevelSpec.isEmpty()) { + return Status(ErrorCodes::FailedToParse, + str::stream() + << "an empty object is not a valid value. Found empty object at path " + << prefix.fullPath(), + 40180); + } + for (auto&& elem : thisLevelSpec) { + auto fieldName = elem.fieldNameStringData(); + if (fieldName[0] == '$') { + // This object is an expression specification like {$add: [...]}. It will be parsed + // into an Expression later, but for now, just track that the prefix has been + // specified and skip it. + if (thisLevelSpec.nFields() != 1) { + return Status(ErrorCodes::FailedToParse, + str::stream() << "an expression specification must contain exactly " + "one field, the name of the expression. Found " + << thisLevelSpec.nFields() + << " fields in " + << thisLevelSpec.toString() + << ", while parsing object " + << _rawObj.toString(), + 40181); + } + Status status = ensurePathDoesNotConflictOrThrow(prefix.fullPath()); + if (!status.isOK()) + return status; + continue; + } + if (fieldName.find('.') != std::string::npos) { + return Status(ErrorCodes::FailedToParse, + str::stream() << "cannot use dotted field name '" << fieldName + << "' in a sub object: " + << _rawObj.toString(), + 40183); + } + Status status = + parseElement(elem, FieldPath::getFullyQualifiedPath(prefix.fullPath(), fieldName)); + if (!status.isOK()) + return status; + } + return Status::OK(); +} + namespace { /** - * This class is responsible for determining if the provided specification is valid, and determining - * whether it specifies an inclusion projection or an exclusion projection. + * This class is responsible for determining what type of $project stage it specifies. */ -class ProjectSpecTypeParser { +class ProjectTypeParser { public: /** - * Parses 'spec' to determine whether it is an inclusion or exclusion projection. - * - * Throws a UserException if the specification is invalid. + * Parses 'spec' to determine whether it is an inclusion or exclusion projection. 'Computed' + * fields (ones which are defined by an expression or a literal) are treated as inclusion + * projections for in this context of the$project stage. */ static ProjectionType parse(const BSONObj& spec) { - ProjectSpecTypeParser parser(spec); + ProjectTypeParser parser(spec); parser.parse(); invariant(parser._parsedType); return *(parser._parsedType); } private: - ProjectSpecTypeParser(const BSONObj& spec) : _rawObj(spec) {} - - /** - * Uses '_seenPaths' to see if 'path' conflicts with any paths that have already been specified. - * - * For example, a user is not allowed to specify {'a': 1, 'a.b': 1}, or some similar conflicting - * paths. - */ - void ensurePathDoesNotConflictOrThrow(StringData path) { - for (auto&& seenPath : _seenPaths) { - uassert(40176, - str::stream() << "$project specification contains two conflicting paths. " - "Cannot specify both '" - << path - << "' and '" - << seenPath - << "': " - << _rawObj.toString(), - path != seenPath && !expression::isPathPrefixOf(path, seenPath) && - !expression::isPathPrefixOf(seenPath, path)); - } - _seenPaths.insert(path.toString()); - } + ProjectTypeParser(const BSONObj& spec) : _rawObj(spec) {} /** * Traverses '_rawObj' to determine the type of projection, populating '_parsedType' in the * process. - * - * Throws a UserException if an invalid projection specification is detected. */ void parse() { - uassert(40177, "$project specification must have at least one field", !_rawObj.isEmpty()); - size_t nFields = 0; for (auto&& elem : _rawObj) { parseElement(elem, FieldPath(elem.fieldName())); @@ -124,11 +190,10 @@ private: /** * Parses a single BSONElement. 'pathToElem' should include the field name of 'elem'. * - * Delegates to parseSubObject() if 'elem' is an object. Otherwise adds the full path to 'elem' - * to '_seenPaths', and updates '_parsedType' if appropriate. + * Delegates to parseSubObject() if 'elem' is an object. Otherwise updates '_parsedType' if + * appropriate. * - * Throws a UserException if the path to 'elem' conflicts with a path that has already been - * specified, or if this element represents a mix of projection types. + * Throws a UserException if this element represents a mix of projection types. */ void parseElement(const BSONElement& elem, const FieldPath& pathToElem) { if (elem.type() == BSONType::Object) { @@ -157,55 +222,29 @@ private: !_parsedType || (*_parsedType == ProjectionType::kInclusion)); _parsedType = ProjectionType::kInclusion; } - ensurePathDoesNotConflictOrThrow(pathToElem.fullPath()); } /** * Traverses 'thisLevelSpec', parsing each element in turn. * - * Throws a UserException if any paths conflict with each other or existing paths, - * 'thisLevelSpec' contains a dotted path, or if 'thisLevelSpec' represents an invalid - * expression. + * Throws a UserException if 'thisLevelSpec' represents an invalid mix of projections. */ void parseNestedObject(const BSONObj& thisLevelSpec, const FieldPath& prefix) { - uassert(40180, - str::stream() << "an empty object is not a valid value in a $project. Found " - "empty object at path " - << prefix.fullPath(), - !thisLevelSpec.isEmpty()); - for (auto elem : thisLevelSpec) { + for (auto&& elem : thisLevelSpec) { auto fieldName = elem.fieldNameStringData(); if (fieldName[0] == '$') { // This object is an expression specification like {$add: [...]}. It will be parsed // into an Expression later, but for now, just track that the prefix has been // specified and skip it. - uassert(40181, - str::stream() - << "Bad projection specification: An expression specification must " - "contain exactly one field, the name of the expression. Found " - << thisLevelSpec.nFields() - << " fields in " - << thisLevelSpec.toString() - << ", while parsing $project object " - << _rawObj.toString(), - thisLevelSpec.nFields() == 1); uassert(40182, str::stream() << "Bad projection specification, cannot include fields or " "add computed fields during an exclusion projection: " << _rawObj.toString(), !_parsedType || _parsedType == ProjectionType::kInclusion); _parsedType = ProjectionType::kInclusion; - ensurePathDoesNotConflictOrThrow(prefix.fullPath()); continue; } - - uassert(40183, - str::stream() << "cannot use dotted field name '" << fieldName - << "' in a sub object of a $project stage: " - << _rawObj.toString(), - fieldName.find('.') == std::string::npos); - parseElement(elem, FieldPath::getFullyQualifiedPath(prefix.fullPath(), fieldName)); } } @@ -215,17 +254,26 @@ private: // This will be populated during parse(). boost::optional<ProjectionType> _parsedType; - - // Tracks which paths we've seen to ensure no two paths conflict with each other. - std::unordered_set<std::string> _seenPaths; }; } // namespace std::unique_ptr<ParsedAggregationProjection> ParsedAggregationProjection::create( const BSONObj& spec) { + // Check that the specification was valid. Status returned is unspecific because validate() + // is used by the $addFields stage as well as $project. + // If there was an error, uassert with a $project-specific message. + Status status = ProjectionSpecValidator::validate(spec); + if (!status.isOK()) { + uasserted(status.location(), + str::stream() << "Invalid $project specification: " << status.reason()); + } + // Check for any conflicting specifications, and determine the type of the projection. - auto projectionType = ProjectSpecTypeParser::parse(spec); + auto projectionType = ProjectTypeParser::parse(spec); + // kComputed is a projection type reserved for $addFields, and should never be detected by the + // ProjectTypeParser. + invariant(projectionType != ProjectionType::kComputed); // We can't use make_unique() here, since the branches have different types. std::unique_ptr<ParsedAggregationProjection> parsedProject( diff --git a/src/mongo/db/pipeline/parsed_aggregation_projection.h b/src/mongo/db/pipeline/parsed_aggregation_projection.h index a3420fdb954..6e8d9100845 100644 --- a/src/mongo/db/pipeline/parsed_aggregation_projection.h +++ b/src/mongo/db/pipeline/parsed_aggregation_projection.h @@ -28,26 +28,89 @@ #pragma once +#include "mongo/platform/basic.h" + #include <boost/intrusive_ptr.hpp> #include <memory> +#include "mongo/bson/bsonelement.h" +#include "mongo/db/pipeline/document_source.h" +#include "mongo/db/pipeline/field_path.h" + namespace mongo { class BSONObj; class Document; -struct DepsTracker; struct ExpressionContext; namespace parsed_aggregation_projection { -enum class ProjectionType { kExclusion, kInclusion }; +enum class ProjectionType { kExclusion, kInclusion, kComputed }; + +/** + * This class ensures that the specification was valid: that none of the paths specified conflict + * with one another, that there is at least one field, etc. Here "projection" includes both + * $project specifications and $addFields specifications. + */ +class ProjectionSpecValidator { +public: + /** + * Returns a Status: either a Status::OK() if the specification is valid for a projection, or a + * non-OK Status, error number, and message with why not. + */ + static Status validate(const BSONObj& spec); + +private: + ProjectionSpecValidator(const BSONObj& spec) : _rawObj(spec) {} + + /** + * Uses '_seenPaths' to see if 'path' conflicts with any paths that have already been specified. + * + * For example, a user is not allowed to specify {'a': 1, 'a.b': 1}, or some similar conflicting + * paths. + */ + Status ensurePathDoesNotConflictOrThrow(StringData path); + + /** + * Returns the relevant error if an invalid projection specification is detected. + */ + Status validate(); + + /** + * Parses a single BSONElement. 'pathToElem' should include the field name of 'elem'. + * + * Delegates to parseSubObject() if 'elem' is an object. Otherwise adds the full path to 'elem' + * to '_seenPaths'. + * + * Calls ensurePathDoesNotConflictOrThrow with the path to this element, which sets the _status + * appropriately for conflicting path specifications. + */ + Status parseElement(const BSONElement& elem, const FieldPath& pathToElem); + + /** + * Traverses 'thisLevelSpec', parsing each element in turn. + * + * Sets _status appropriately if any paths conflict with each other or existing paths, + * 'thisLevelSpec' contains a dotted path, or if 'thisLevelSpec' represents an invalid + * expression. + */ + Status parseNestedObject(const BSONObj& thisLevelSpec, const FieldPath& prefix); + + // The original object. Used to generate more helpful error messages. + const BSONObj& _rawObj; + + // Tracks which paths we've seen to ensure no two paths conflict with each other. + // Can be a vector since we iterate through it. + std::vector<std::string> _seenPaths; +}; /** - * A ParsedAggregationProjection is responsible for parsing and executing a projection. It + * A ParsedAggregationProjection is responsible for parsing and executing a $project. It * represents either an inclusion or exclusion projection. This is the common interface between the * two types of projections. */ -class ParsedAggregationProjection { +class ParsedAggregationProjection + : public DocumentSourceSingleDocumentTransformation::TransformerInterface { public: /** * Main entry point for a ParsedAggregationProjection. @@ -72,11 +135,6 @@ public: virtual void parse(const BSONObj& spec) = 0; /** - * Serialize this projection. - */ - virtual Document serialize(bool explain = false) const = 0; - - /** * Optimize any expressions contained within this projection. */ virtual void optimize() {} @@ -89,15 +147,24 @@ public: /** * Add any dependencies needed by this projection or any sub-expressions to 'deps'. */ - virtual void addDependencies(DepsTracker* deps) const {} + virtual DocumentSource::GetDepsReturn addDependencies(DepsTracker* deps) const { + return DocumentSource::NOT_SUPPORTED; + } /** - * Apply the projection to 'input'. + * Apply the projection transformation. */ - virtual Document applyProjection(Document input) const = 0; + Document applyTransformation(Document input) { + return applyProjection(input); + } protected: ParsedAggregationProjection() = default; + + /** + * Apply the projection to 'input'. + */ + virtual Document applyProjection(Document input) const = 0; }; } // namespace parsed_aggregation_projection } // namespace mongo diff --git a/src/mongo/db/pipeline/parsed_exclusion_projection.h b/src/mongo/db/pipeline/parsed_exclusion_projection.h index 540632f3867..c51ccd754cf 100644 --- a/src/mongo/db/pipeline/parsed_exclusion_projection.h +++ b/src/mongo/db/pipeline/parsed_exclusion_projection.h @@ -116,6 +116,10 @@ public: */ Document applyProjection(Document inputDoc) const final; + DocumentSource::GetDepsReturn addDependencies(DepsTracker* deps) const { + return DocumentSource::SEE_NEXT; + } + private: /** * Helper for parse() above. diff --git a/src/mongo/db/pipeline/parsed_inclusion_projection.h b/src/mongo/db/pipeline/parsed_inclusion_projection.h index d762d6d7ede..e72e334f837 100644 --- a/src/mongo/db/pipeline/parsed_inclusion_projection.h +++ b/src/mongo/db/pipeline/parsed_inclusion_projection.h @@ -209,8 +209,9 @@ public: _root->injectExpressionContext(expCtx); } - void addDependencies(DepsTracker* deps) const final { + DocumentSource::GetDepsReturn addDependencies(DepsTracker* deps) const final { _root->addDependencies(deps); + return DocumentSource::EXHAUSTIVE_FIELDS; } /** diff --git a/src/mongo/db/pipeline/pipeline.cpp b/src/mongo/db/pipeline/pipeline.cpp index a4f1325933f..8918addf200 100644 --- a/src/mongo/db/pipeline/pipeline.cpp +++ b/src/mongo/db/pipeline/pipeline.cpp @@ -336,8 +336,9 @@ void Pipeline::Optimizations::Sharded::limitFieldsSentFromShardsToMerger(Pipelin return; } // if we get here, add the project. - shardPipe->_sources.push_back(DocumentSourceProject::createFromBson( - BSON("$project" << mergeDeps.toProjection()).firstElement(), shardPipe->pCtx)); + boost::intrusive_ptr<DocumentSource> project = DocumentSourceProject::create( + BSON("$project" << mergeDeps.toProjection()).firstElement(), shardPipe->pCtx); + shardPipe->_sources.push_back(project); } BSONObj Pipeline::getInitialQuery() const { |