diff options
author | Nick Zolnierz <nicholas.zolnierz@mongodb.com> | 2019-08-06 11:55:16 -0400 |
---|---|---|
committer | Nick Zolnierz <nicholas.zolnierz@mongodb.com> | 2019-08-07 13:44:43 -0400 |
commit | 370187d7d9169eeac4065b9b60b8f74c331eae67 (patch) | |
tree | f635fe4e03d54207b07ebdc722ff20d4d08987b0 | |
parent | d7455e67f5b002de110f7972603906888aff66b8 (diff) | |
download | mongo-370187d7d9169eeac4065b9b60b8f74c331eae67.tar.gz |
SERVER-42507 Add internal javascript emit aggregation expression
-rw-r--r-- | jstests/aggregation/expressions/internal_js_emit.js | 88 | ||||
-rw-r--r-- | src/mongo/db/pipeline/SConscript | 13 | ||||
-rw-r--r-- | src/mongo/db/pipeline/expression_javascript.cpp | 145 | ||||
-rw-r--r-- | src/mongo/db/pipeline/expression_javascript.h | 76 | ||||
-rw-r--r-- | src/mongo/db/pipeline/expression_visitor.h | 2 | ||||
-rw-r--r-- | src/mongo/db/pipeline/javascript_execution.h | 67 | ||||
-rw-r--r-- | src/mongo/db/pipeline/mongo_process_interface.h | 11 | ||||
-rw-r--r-- | src/mongo/db/pipeline/mongos_process_interface.h | 7 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline.cpp | 1 | ||||
-rw-r--r-- | src/mongo/db/pipeline/process_interface_standalone.h | 22 | ||||
-rw-r--r-- | src/mongo/db/pipeline/stub_mongo_process_interface.h | 5 |
11 files changed, 437 insertions, 0 deletions
diff --git a/jstests/aggregation/expressions/internal_js_emit.js b/jstests/aggregation/expressions/internal_js_emit.js new file mode 100644 index 00000000000..5a3fed72e88 --- /dev/null +++ b/jstests/aggregation/expressions/internal_js_emit.js @@ -0,0 +1,88 @@ +// Tests basic functionality of the $_internalJsEmit expression, which provides capability for the +// map stage of MapReduce. +(function() { +"use strict"; + +load('jstests/aggregation/extras/utils.js'); + +const coll = db.js_emit_expr; +coll.drop(); + +function fmap() { + for (let word of this.text.split(' ')) { + emit(word, 1); + } +} + +let pipeline = [ + { + $project: { + emits: { + $_internalJsEmit: { + 'this': '$$ROOT', + 'eval': fmap, + }, + }, + _id: 0, + } + }, + {$unwind: '$emits'}, + {$replaceRoot: {newRoot: '$emits'}} +]; + +assert.commandWorked(coll.insert({text: 'hello world'})); + +let results = coll.aggregate(pipeline, {cursor: {batchSize: 1}}).toArray(); +assert(resultsEq(results, [{k: "hello", v: 1}, {k: "world", v: 1}]), results); + +assert.commandWorked(coll.insert({text: 'mongo db'})); + +// Set batchSize to 1 to check that the expression is able to run across getMore's. +results = coll.aggregate(pipeline, {cursor: {batchSize: 1}}).toArray(); +assert(resultsEq(results, + [{k: 'hello', v: 1}, {k: 'world', v: 1}, {k: 'mongo', v: 1}, {k: 'db', v: 1}]), + results); + +// Test that the 'eval' function accepts a string argument. +pipeline[0].$project.emits.$_internalJsEmit.eval = fmap.toString(); +results = coll.aggregate(pipeline, {cursor: {batchSize: 1}}).toArray(); +assert(resultsEq(results, + [{k: 'hello', v: 1}, {k: 'world', v: 1}, {k: 'mongo', v: 1}, {k: 'db', v: 1}]), + results); + +// Test that the command correctly fails for an invalid operation within the JS function. +assert.commandWorked(coll.insert({text: 5})); +assert.commandFailedWithCode( + db.runCommand({aggregate: coll.getName(), pipeline: pipeline, cursor: {}}), + ErrorCodes.JSInterpreterFailure); + +// Test that the command correctly fails for invalid arguments. +pipeline = [{ + $project: { + emits: { + $_internalJsEmit: { + 'this': 'must evaluate to an object', + 'eval': fmap, + }, + }, + _id: 0, + } +}]; +assert.commandFailedWithCode( + db.runCommand({aggregate: coll.getName(), pipeline: pipeline, cursor: {}}), 31225); + +pipeline = [{ + $project: { + emits: { + $_internalJsEmit: { + 'this': '$$ROOT', + 'eval': 'this is not a valid function!', + }, + }, + _id: 0, + } +}]; +assert.commandFailedWithCode( + db.runCommand({aggregate: coll.getName(), pipeline: pipeline, cursor: {}}), + ErrorCodes.JSInterpreterFailure); +})();
\ No newline at end of file diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript index aae909141c1..d836cf94e00 100644 --- a/src/mongo/db/pipeline/SConscript +++ b/src/mongo/db/pipeline/SConscript @@ -117,6 +117,17 @@ env.Library( ) env.Library( + target='expression_javascript', + source=[ + 'expression_javascript.cpp' + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/scripting/scripting_common', + 'expression', + ] +) + +env.Library( target='accumulator', source=[ 'accumulation_statement.cpp', @@ -228,6 +239,7 @@ env.Library( '$BUILD_DIR/mongo/db/session_catalog', '$BUILD_DIR/mongo/db/storage/backup_cursor_hooks', '$BUILD_DIR/mongo/db/transaction', + '$BUILD_DIR/mongo/scripting/scripting_common', ], ) @@ -357,6 +369,7 @@ pipelineEnv.Library( 'document_value', 'expression', 'expression_context', + 'expression_javascript', 'granularity_rounder', 'parsed_aggregation_projection', ], diff --git a/src/mongo/db/pipeline/expression_javascript.cpp b/src/mongo/db/pipeline/expression_javascript.cpp new file mode 100644 index 00000000000..869b6d7d9ff --- /dev/null +++ b/src/mongo/db/pipeline/expression_javascript.cpp @@ -0,0 +1,145 @@ +/** + * Copyright (C) 2019-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/auth/authorization_session.h" +#include "mongo/db/commands/test_commands_enabled.h" +#include "mongo/db/pipeline/expression_javascript.h" + +namespace mongo { + +REGISTER_EXPRESSION(_internalJsEmit, ExpressionInternalJsEmit::parse); + +namespace { + +/** + * This function is called from the JavaScript function provided to the expression. Objects are + * converted from BSON to Document/Value after JS engine has run completely. + */ +BSONObj emitFromJS(const BSONObj& args, void* data) { + uassert(31220, "emit takes 2 args", args.nFields() == 2); + auto emitted = static_cast<std::vector<BSONObj>*>(data); + if (args.firstElement().type() == Undefined) { + emitted->push_back(BSON("k" << BSONNULL << "v" << args["1"])); + } else { + emitted->push_back(BSON("k" << args["0"] << "v" << args["1"])); + } + return BSONObj(); +} +} // namespace + +ExpressionInternalJsEmit::ExpressionInternalJsEmit( + const boost::intrusive_ptr<ExpressionContext>& expCtx, + boost::intrusive_ptr<Expression> thisRef, + std::string funcSource) + : Expression(expCtx, {std::move(thisRef)}), + _thisRef(_children[0]), + _funcSource(std::move(funcSource)) {} + +void ExpressionInternalJsEmit::_doAddDependencies(mongo::DepsTracker* deps) const { + _children[0]->addDependencies(deps); +} + +boost::intrusive_ptr<Expression> ExpressionInternalJsEmit::parse( + const boost::intrusive_ptr<ExpressionContext>& expCtx, + BSONElement expr, + const VariablesParseState& vps) { + + uassert(ErrorCodes::BadValue, + str::stream() << kExpressionName << " not allowed without enabling test commands.", + getTestCommandsEnabled()); + + uassert(31221, + str::stream() << kExpressionName + << " requires an object as an argument, found: " << typeName(expr.type()), + expr.type() == BSONType::Object); + + BSONElement evalField = expr["eval"]; + + uassert(31222, str::stream() << "The map function must be specified.", evalField); + uassert(31224, + "The map function must be of type string, code, or code w/ scope", + evalField.type() == BSONType::String || evalField.type() == BSONType::Code || + evalField.type() == BSONType::CodeWScope); + + std::string funcSourceString = evalField._asCode(); + boost::intrusive_ptr<Expression> thisRef = parseOperand(expCtx, expr["this"], vps); + + + uassert(31223, str::stream() << kExpressionName << " requires 'this' to be specified", thisRef); + + return new ExpressionInternalJsEmit(expCtx, std::move(thisRef), std::move(funcSourceString)); +} + +Value ExpressionInternalJsEmit::serialize(bool explain) const { + return Value( + Document{{kExpressionName, + Document{{"eval", _funcSource}, {"this", _thisRef->serialize(explain)}}}}); +} + +Value ExpressionInternalJsEmit::evaluate(const Document& root, Variables* variables) const { + auto expCtx = getExpressionContext(); + uassert(31234, + str::stream() << kExpressionName << " is not allowed to run on mongos", + !expCtx->inMongos); + + Value thisVal = _thisRef->evaluate(root, variables); + uassert(31225, "'this' must be an object.", thisVal.getType() == BSONType::Object); + + // If the scope does not exist and is created by the call to ExpressionContext::getJsExec(), + // then make sure to re-bind emit() and the given function to the new scope. + auto [jsExec, newlyCreated] = expCtx->mongoProcessInterface->getJsExec(); + if (newlyCreated) { + jsExec->getScope()->loadStored(expCtx->opCtx, true); + + const_cast<ExpressionInternalJsEmit*>(this)->_func = + jsExec->getScope()->createFunction(_funcSource.c_str()); + uassert(31226, "The map function did not evaluate", _func); + jsExec->getScope()->injectNative( + "emit", emitFromJS, &const_cast<ExpressionInternalJsEmit*>(this)->_emittedObjects); + } + + BSONObj thisBSON = thisVal.getDocument().toBson(); + BSONObj params; + jsExec->callFunctionWithoutReturn(_func, params, thisBSON); + + std::vector<Value> output; + + for (const BSONObj& obj : _emittedObjects) { + output.push_back(Value(std::move(obj))); + } + + // Need to const_cast here in order to clean out _emittedObjects which were added in the call to + // JS in this function. This is so _emittedObjects is empty again for the next JS invocation. + const_cast<ExpressionInternalJsEmit*>(this)->_emittedObjects.clear(); + + return Value{std::move(output)}; +} +} // namespace mongo diff --git a/src/mongo/db/pipeline/expression_javascript.h b/src/mongo/db/pipeline/expression_javascript.h new file mode 100644 index 00000000000..8de8831bc40 --- /dev/null +++ b/src/mongo/db/pipeline/expression_javascript.h @@ -0,0 +1,76 @@ +/** + * Copyright (C) 2019-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/pipeline/expression.h" +#include "mongo/db/pipeline/javascript_execution.h" + +/** + * This file contains all expressions which make use of JavaScript execution and depend on the JS + * engine to operate. + */ +namespace mongo { + +/** + * This expression takes in a JavaScript function and a "this" reference, and returns an array of + * key/value objects which are the results of calling emit() from the provided JS function. + */ +class ExpressionInternalJsEmit final : public Expression { +public: + Value evaluate(const Document& root, Variables* variables) const final; + static boost::intrusive_ptr<Expression> parse( + const boost::intrusive_ptr<ExpressionContext>& expCtx, + BSONElement expr, + const VariablesParseState& vps); + Value serialize(bool explain) const final; + + void acceptVisitor(ExpressionVisitor* visitor) final { + return visitor->visit(this); + } + +private: + const boost::intrusive_ptr<Expression>& _thisRef; + + ExpressionInternalJsEmit(const boost::intrusive_ptr<ExpressionContext>& expCtx, + boost::intrusive_ptr<Expression> thisRef, + std::string funcSourceString); + + // Vector needs to be a member on the class so that it can be attached to the JS scope in the + // constructor and stay alive through the lifetime of the Expression object. + std::vector<BSONObj> _emittedObjects{}; + + void _doAddDependencies(DepsTracker* deps) const final override; + + std::string _funcSource; + ScriptingFunction _func; + + static constexpr auto kExpressionName = "$_internalJsEmit"_sd; +}; +} // namespace mongo diff --git a/src/mongo/db/pipeline/expression_visitor.h b/src/mongo/db/pipeline/expression_visitor.h index 0084df6e77f..177eefc7e28 100644 --- a/src/mongo/db/pipeline/expression_visitor.h +++ b/src/mongo/db/pipeline/expression_visitor.h @@ -139,6 +139,7 @@ class ExpressionHyperbolicCosine; class ExpressionHyperbolicSine; class ExpressionDegreesToRadians; class ExpressionRadiansToDegrees; +class ExpressionInternalJsEmit; class AccumulatorAvg; class AccumulatorMax; @@ -275,6 +276,7 @@ public: virtual void visit(ExpressionFromAccumulator<AccumulatorSum>*) = 0; virtual void visit(ExpressionFromAccumulator<AccumulatorMergeObjects>*) = 0; virtual void visit(ExpressionTests::Testable*) = 0; + virtual void visit(ExpressionInternalJsEmit*) = 0; }; } // namespace mongo diff --git a/src/mongo/db/pipeline/javascript_execution.h b/src/mongo/db/pipeline/javascript_execution.h new file mode 100644 index 00000000000..2b326eb5c30 --- /dev/null +++ b/src/mongo/db/pipeline/javascript_execution.h @@ -0,0 +1,67 @@ +/** + * Copyright (C) 2019-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/client.h" +#include "mongo/db/pipeline/value.h" +#include "mongo/scripting/engine.h" + +namespace mongo { +/* + * This class provides a more sensible interface with JavaScript Scope objects. It helps with + * boilerplate related to calling JS functions from C++ code, and extracting BSON objects from the + * JS engine. + */ +class JsExecution { +public: + /** + * Construct with a thread-local scope. + */ + JsExecution() : _scope(nullptr) { + _scope.reset(getGlobalScriptEngine()->newScopeForCurrentThread()); + } + + void callFunctionWithoutReturn(ScriptingFunction func, + const BSONObj& params, + const BSONObj& thisObj) { + _scope->registerOperation(Client::getCurrent()->getOperationContext()); + const auto guard = makeGuard([&] { _scope->unregisterOperation(); }); + + _scope->invoke(func, ¶ms, &thisObj, 0, true); + } + + Scope* getScope() { + return _scope.get(); + } + +private: + std::unique_ptr<Scope> _scope; +}; +} // namespace mongo diff --git a/src/mongo/db/pipeline/mongo_process_interface.h b/src/mongo/db/pipeline/mongo_process_interface.h index 503a4065cb6..1899cf08053 100644 --- a/src/mongo/db/pipeline/mongo_process_interface.h +++ b/src/mongo/db/pipeline/mongo_process_interface.h @@ -58,6 +58,7 @@ namespace mongo { class ShardFilterer; class ExpressionContext; +class JsExecution; class Pipeline; class PipelineDeleter; class TransactionHistoryIteratorBase; @@ -405,6 +406,16 @@ public: boost::optional<std::vector<std::string>> fields, boost::optional<ChunkVersion> targetCollectionVersion, const NamespaceString& outputNs) const = 0; + + /** + * Create or get a pointer to a JsExecution instance, capable of invoking Javascript functions + * and reading the return value. + * + * Returns a pointer to a JsExecution and a boolean to indicate whether the JS Scope was newly + * created. + */ + virtual std::pair<JsExecution*, bool> getJsExec() = 0; + virtual void releaseJsExec() = 0; }; } // namespace mongo diff --git a/src/mongo/db/pipeline/mongos_process_interface.h b/src/mongo/db/pipeline/mongos_process_interface.h index eaf418a1153..50d1149b3b9 100644 --- a/src/mongo/db/pipeline/mongos_process_interface.h +++ b/src/mongo/db/pipeline/mongos_process_interface.h @@ -239,6 +239,13 @@ public: boost::optional<ChunkVersion> targetCollectionVersion, const NamespaceString& outputNs) const override; + std::pair<JsExecution*, bool> getJsExec() override { + // Javascript engine is not support on mongos. + MONGO_UNREACHABLE; + } + + void releaseJsExec() override {} + protected: BSONObj _reportCurrentOpForClient(OperationContext* opCtx, Client* client, diff --git a/src/mongo/db/pipeline/pipeline.cpp b/src/mongo/db/pipeline/pipeline.cpp index 3337a79e4b8..355bc8a429a 100644 --- a/src/mongo/db/pipeline/pipeline.cpp +++ b/src/mongo/db/pipeline/pipeline.cpp @@ -320,6 +320,7 @@ bool Pipeline::aggHasWriteStage(const BSONObj& cmd) { void Pipeline::detachFromOperationContext() { pCtx->opCtx = nullptr; pCtx->mongoProcessInterface->setOperationContext(nullptr); + pCtx->mongoProcessInterface->releaseJsExec(); for (auto&& source : _sources) { source->detachFromOperationContext(); diff --git a/src/mongo/db/pipeline/process_interface_standalone.h b/src/mongo/db/pipeline/process_interface_standalone.h index 392af0beb5e..e367df574dd 100644 --- a/src/mongo/db/pipeline/process_interface_standalone.h +++ b/src/mongo/db/pipeline/process_interface_standalone.h @@ -33,6 +33,7 @@ #include "mongo/db/exec/shard_filterer.h" #include "mongo/db/ops/write_ops_exec.h" #include "mongo/db/ops/write_ops_gen.h" +#include "mongo/db/pipeline/javascript_execution.h" #include "mongo/db/pipeline/mongo_process_common.h" #include "mongo/db/pipeline/pipeline.h" @@ -153,6 +154,22 @@ public: boost::optional<ChunkVersion> targetCollectionVersion, const NamespaceString& outputNs) const override; + /** + * If we are creating a new JsExecution (and therefore a new thread-local scope), make sure + * we pass that information back to the caller. + */ + std::pair<JsExecution*, bool> getJsExec() override { + if (!_jsExec) { + _jsExec = std::make_unique<JsExecution>(); + return {_jsExec.get(), true}; + } + return {_jsExec.get(), false}; + } + + void releaseJsExec() override { + _jsExec.reset(); + } + protected: BSONObj _reportCurrentOpForClient(OperationContext* opCtx, Client* client, @@ -193,6 +210,11 @@ private: DBDirectClient _client; std::map<UUID, std::unique_ptr<const CollatorInterface>> _collatorCache; + + // Object which contains a JavaScript Scope, used for executing JS in pipeline stages and + // expressions. Owned by the process interface so that there is one common scope for the + // lifetime of a pipeline. + std::unique_ptr<JsExecution> _jsExec; }; } // namespace mongo diff --git a/src/mongo/db/pipeline/stub_mongo_process_interface.h b/src/mongo/db/pipeline/stub_mongo_process_interface.h index 147be45fc46..23a90059f13 100644 --- a/src/mongo/db/pipeline/stub_mongo_process_interface.h +++ b/src/mongo/db/pipeline/stub_mongo_process_interface.h @@ -241,5 +241,10 @@ public: } return {fieldPaths, targetCollectionVersion}; } + + std::pair<JsExecution*, bool> getJsExec() { + MONGO_UNREACHABLE; + } + void releaseJsExec() {} }; } // namespace mongo |