diff options
author | Arun Banala <arun.banala@mongodb.com> | 2019-04-23 12:31:41 +0100 |
---|---|---|
committer | Arun Banala <arun.banala@mongodb.com> | 2019-04-30 11:13:19 +0100 |
commit | 7ea3f16dc509b9b0cf03775fd7fcd153a3de7d7f (patch) | |
tree | 5a35007eb9a2b2ffc376a2e97cbbded14acfc44a /src/mongo/db/pipeline | |
parent | 3d2cb8571b68999c10a5fab5bd0e8b9e5e498337 (diff) | |
download | mongo-7ea3f16dc509b9b0cf03775fd7fcd153a3de7d7f.tar.gz |
SERVER-40584 Regex agg expressions should not inherit from ExpressionFixedArity
Diffstat (limited to 'src/mongo/db/pipeline')
-rw-r--r-- | src/mongo/db/pipeline/expression.cpp | 209 | ||||
-rw-r--r-- | src/mongo/db/pipeline/expression.h | 133 | ||||
-rw-r--r-- | src/mongo/db/pipeline/expression_test.cpp | 68 |
3 files changed, 225 insertions, 185 deletions
diff --git a/src/mongo/db/pipeline/expression.cpp b/src/mongo/db/pipeline/expression.cpp index ec0a0d8f435..0084f870d30 100644 --- a/src/mongo/db/pipeline/expression.cpp +++ b/src/mongo/db/pipeline/expression.cpp @@ -5669,64 +5669,42 @@ Value ExpressionConvert::performConversion(BSONType targetType, Value inputValue BSONType inputType = inputValue.getType(); return table.findConversionFunc(inputType, targetType)(getExpressionContext(), inputValue); } -namespace { - -boost::optional<Value> extractValueFromConstantExpression( - const std::string& fieldName, - const std::vector<std::pair<std::string, boost::intrusive_ptr<Expression>>>& childExpressions) { - // Find the element with the fieldName. - auto expressionPairItr = std::find_if( - childExpressions.begin(), childExpressions.end(), [&](const auto& childExpression) { - return childExpression.first == fieldName; - }); - - // If the field doesn't exists it is still eligible for optimization. - if (expressionPairItr == childExpressions.end()) { - return Value(BSONNULL); - } - // If the field exists and not null/constant, we cannot optimize. - if (!ExpressionConstant::isNullOrConstant(expressionPairItr->second)) { - return boost::none; - } - auto* expression = expressionPairItr->second.get(); - return dynamic_cast<ExpressionConstant*>(expression)->getValue(); -} +/* -------------------------- ExpressionRegex ------------------------------ */ -} // namespace +ExpressionRegex::ExpressionRegex(const boost::intrusive_ptr<ExpressionContext>& expCtx, + BSONElement expr, + const VariablesParseState& vpsIn, + const std::string& opName) + : Expression(expCtx), _opName(std::move(opName)) { + uassert(51103, + str::stream() << _opName << " expects an object of named arguments but found: " + << expr.type(), + expr.type() == BSONType::Object); -void RegexMatchHandler::optimize(boost::intrusive_ptr<Expression> expression) { - auto optimizedExpr = expression->optimize(); - - // If 'input', 'regex' and 'options' are null/constant then 'optimize()' will convert the - // object to an expression of type 'ExpressionConstant'. - if (auto* exprObj = dynamic_cast<ExpressionConstant*>(optimizedExpr.get())) { - _initialExecStateForConstantRegex = buildInitialState(exprObj->getValue()); - } else if (auto* exprObj = dynamic_cast<ExpressionObject*>(optimizedExpr.get())) { - // Extract the children and check for constant 'regex' and 'options'. - auto& children = exprObj->getChildExpressions(); - auto regex = extractValueFromConstantExpression("regex", children); - auto options = extractValueFromConstantExpression("options", children); - - // If both 'regex' and 'options' are null/constant, we can pre-compile the execution state. - if (regex && options) { - RegexExecutionState executionState; - _extractRegexAndOptions(&executionState, *regex, *options); - _compile(&executionState); - _initialExecStateForConstantRegex = std::move(executionState); + for (auto&& elem : expr.embeddedObject()) { + const auto field = elem.fieldNameStringData(); + if (field == "input"_sd) { + _input = parseOperand(expCtx, elem, vpsIn); + } else if (field == "regex"_sd) { + _regex = parseOperand(expCtx, elem, vpsIn); + } else if (field == "options"_sd) { + _options = parseOperand(expCtx, elem, vpsIn); + } else { + uasserted(31024, + str::stream() << _opName << " found an unknown argument: " + << elem.fieldNameStringData()); } } + uassert(31022, str::stream() << _opName << " requires 'input' parameter", _input); + uassert(31023, str::stream() << _opName << " requires 'regex' parameter", _regex); } -RegexMatchHandler::RegexExecutionState RegexMatchHandler::buildInitialState( - const Value& inputExpr) const { - uassert(51103, - str::stream() << "expression expects an object of named arguments, but found type " - << inputExpr.getType(), - inputExpr.getType() == BSONType::Object); - Value textInput = inputExpr.getDocument().getField("input"); - Value regexPattern = inputExpr.getDocument().getField("regex"); - Value regexOptions = inputExpr.getDocument().getField("options"); +ExpressionRegex::RegexExecutionState ExpressionRegex::buildInitialState( + const Document& root) const { + Value textInput = _input->evaluate(root); + Value regexPattern = _regex->evaluate(root); + Value regexOptions = _options ? _options->evaluate(root) : Value(BSONNULL); auto executionState = _initialExecStateForConstantRegex.value_or(RegexExecutionState()); @@ -5744,7 +5722,7 @@ RegexMatchHandler::RegexExecutionState RegexMatchHandler::buildInitialState( return executionState; } -int RegexMatchHandler::execute(RegexExecutionState* regexState) const { +int ExpressionRegex::execute(RegexExecutionState* regexState) const { invariant(regexState); invariant(!regexState->nullish()); invariant(regexState->pcrePtr); @@ -5761,13 +5739,14 @@ int RegexMatchHandler::execute(RegexExecutionState* regexState) const { // negative (other than -1) if there is an error during execution, and zero if capturesBuffer's // capacity is not sufficient to hold all the results. The latter scenario should never occur. uassert(51156, - str::stream() << "Error occurred while executing the regular expression. Result code:" + str::stream() << "Error occurred while executing the regular expression in " << _opName + << ". Result code: " << execResult, execResult == -1 || execResult == (regexState->numCaptures + 1)); return execResult; } -Value RegexMatchHandler::nextMatch(RegexExecutionState* regexState) const { +Value ExpressionRegex::nextMatch(RegexExecutionState* regexState) const { int execResult = execute(regexState); // No match. @@ -5816,13 +5795,34 @@ Value RegexMatchHandler::nextMatch(RegexExecutionState* regexState) const { return match.freezeToValue(); } -void RegexMatchHandler::_compile(RegexExecutionState* executionState) const { +boost::intrusive_ptr<Expression> ExpressionRegex::optimize() { + _input = _input->optimize(); + _regex = _regex->optimize(); + if (_options) { + _options = _options->optimize(); + } + + if (ExpressionConstant::allNullOrConstant({_regex, _options})) { + _initialExecStateForConstantRegex.emplace(); + _extractRegexAndOptions( + _initialExecStateForConstantRegex.get_ptr(), + dynamic_cast<ExpressionConstant*>(_regex.get())->getValue(), + _options ? dynamic_cast<ExpressionConstant*>(_options.get())->getValue() : Value()); + _compile(_initialExecStateForConstantRegex.get_ptr()); + } + return this; +} + +void ExpressionRegex::_compile(RegexExecutionState* executionState) const { + const auto pcreOptions = - regex_util::flags2PcreOptions(executionState->options, false).all_options(); + regex_util::flagsToPcreOptions(executionState->options.value_or(""), false, _opName) + .all_options(); if (!executionState->pattern) { return; } + const char* compile_error; int eoffset; @@ -5833,7 +5833,9 @@ void RegexMatchHandler::_compile(RegexExecutionState* executionState) const { pcre_compile( executionState->pattern->c_str(), pcreOptions, &compile_error, &eoffset, nullptr), pcre_free); - uassert(51111, str::stream() << "Invalid Regex: " << compile_error, executionState->pcrePtr); + uassert(51111, + str::stream() << "Invalid Regex in " << _opName << ": " << compile_error, + executionState->pcrePtr); // Calculate the number of capture groups present in 'pattern' and store in 'numCaptures'. const int pcre_retval = pcre_fullinfo( @@ -5848,35 +5850,44 @@ void RegexMatchHandler::_compile(RegexExecutionState* executionState) const { executionState->capturesBuffer.resize((1 + executionState->numCaptures) * 3); } -void RegexMatchHandler::_extractInputField(RegexExecutionState* executionState, - const Value& textInput) const { +Value ExpressionRegex::serialize(bool explain) const { + return Value( + Document{{_opName, + Document{{"input", _input->serialize(explain)}, + {"regex", _regex->serialize(explain)}, + {"options", _options ? _options->serialize(explain) : Value()}}}}); +} + +void ExpressionRegex::_extractInputField(RegexExecutionState* executionState, + const Value& textInput) const { uassert(51104, - "'input' field should be of type string", + str::stream() << _opName << " needs 'input' to be of type string", textInput.nullish() || textInput.getType() == BSONType::String); if (textInput.getType() == BSONType::String) { executionState->input = textInput.getString(); } } -void RegexMatchHandler::_extractRegexAndOptions(RegexExecutionState* executionState, - const Value& regexPattern, - const Value& regexOptions) const { +void ExpressionRegex::_extractRegexAndOptions(RegexExecutionState* executionState, + const Value& regexPattern, + const Value& regexOptions) const { uassert(51105, - "'regex' field should be of type string or regex", + str::stream() << _opName << " needs 'regex' to be of type string or regex", regexPattern.nullish() || regexPattern.getType() == BSONType::String || regexPattern.getType() == BSONType::RegEx); uassert(51106, - "'options' should be of type string", + str::stream() << _opName << " needs 'options' to be of type string", regexOptions.nullish() || regexOptions.getType() == BSONType::String); // The 'regex' field can be a RegEx object and may have its own options... if (regexPattern.getType() == BSONType::RegEx) { StringData regexFlags = regexPattern.getRegexFlags(); executionState->pattern = regexPattern.getRegex(); - uassert( - 51107, - str::stream() << "Found regex option(s) specified in both 'regex' and 'option' fields", - regexOptions.nullish() || regexFlags.empty()); + uassert(51107, + str::stream() + << _opName + << ": found regex option(s) specified in both 'regex' and 'option' fields", + regexOptions.nullish() || regexFlags.empty()); if (!regexFlags.empty()) { executionState->options = regexFlags.toString(); } @@ -5885,45 +5896,47 @@ void RegexMatchHandler::_extractRegexAndOptions(RegexExecutionState* executionSt executionState->pattern = regexPattern.getString(); } - // If 'options' is non-null, we must extract and validate its contents even if 'regexPattern' is - // nullish. + // If 'options' is non-null, we must validate its contents even if 'regexPattern' is nullish. if (!regexOptions.nullish()) { executionState->options = regexOptions.getString(); } uassert(51109, - "Regular expression cannot contain an embedded null byte", - !executionState->pattern || executionState->pattern->find('\0', 0) == string::npos); + str::stream() << _opName << ": regular expression cannot contain an embedded null byte", + !executionState->pattern || + executionState->pattern->find('\0', 0) == std::string::npos); + uassert(51110, - "Regular expression options string cannot contain an embedded null byte", - executionState->options.find('\0', 0) == string::npos); + str::stream() << _opName + << ": regular expression options cannot contain an embedded null byte", + !executionState->options || + executionState->options->find('\0', 0) == std::string::npos); } -boost::intrusive_ptr<Expression> ExpressionRegexFind::optimize() { - _handler.optimize(vpOperand[0]); - return this; +void ExpressionRegex::_doAddDependencies(DepsTracker* deps) const { + _input->addDependencies(deps); + _regex->addDependencies(deps); + if (_options) { + _options->addDependencies(deps); + } } +/* -------------------------- ExpressionRegexFind ------------------------------ */ + Value ExpressionRegexFind::evaluate(const Document& root) const { - auto executionState = _handler.buildInitialState(vpOperand[0]->evaluate(root)); + auto executionState = buildInitialState(root); if (executionState.nullish()) { return Value(BSONNULL); } - return _handler.nextMatch(&executionState); + return nextMatch(&executionState); } REGISTER_EXPRESSION(regexFind, ExpressionRegexFind::parse); -const char* ExpressionRegexFind::getOpName() const { - return "$regexFind"; -} -boost::intrusive_ptr<Expression> ExpressionRegexFindAll::optimize() { - _handler.optimize(vpOperand[0]); - return this; -} +/* -------------------------- ExpressionRegexFindAll ------------------------------ */ Value ExpressionRegexFindAll::evaluate(const Document& root) const { std::vector<Value> output; - auto executionState = _handler.buildInitialState(vpOperand[0]->evaluate(root)); + auto executionState = buildInitialState(root); if (executionState.nullish()) { return Value(output); } @@ -5933,13 +5946,14 @@ Value ExpressionRegexFindAll::evaluate(const Document& root) const { // Using do...while loop because, when input is an empty string, we still want to see if there // is a match. do { - auto matchObj = _handler.nextMatch(&executionState); + auto matchObj = nextMatch(&executionState); if (matchObj.getType() == BSONType::jstNULL) { break; } totalDocSize += matchObj.getApproximateSize(); uassert(51151, - "The size of buffer to store $regexFindAll output exceeded the 64MB limit", + str::stream() << getOpName() + << ": the size of buffer to store output exceeded the 64MB limit", totalDocSize <= mongo::BufferMaxSize); output.push_back(matchObj); @@ -5969,24 +5983,15 @@ Value ExpressionRegexFindAll::evaluate(const Document& root) const { } REGISTER_EXPRESSION(regexFindAll, ExpressionRegexFindAll::parse); -const char* ExpressionRegexFindAll::getOpName() const { - return "$regexFindAll"; -} -boost::intrusive_ptr<Expression> ExpressionRegexMatch::optimize() { - _handler.optimize(vpOperand[0]); - return this; -} +/* -------------------------- ExpressionRegexMatch ------------------------------ */ Value ExpressionRegexMatch::evaluate(const Document& root) const { - auto executionState = _handler.buildInitialState(vpOperand[0]->evaluate(root)); + auto executionState = buildInitialState(root); // Return output of execute only if regex is not nullish. - return executionState.nullish() ? Value(false) : Value(_handler.execute(&executionState) > 0); + return executionState.nullish() ? Value(false) : Value(execute(&executionState) > 0); } REGISTER_EXPRESSION(regexMatch, ExpressionRegexMatch::parse); -const char* ExpressionRegexMatch::getOpName() const { - return "$regexMatch"; -} } // namespace mongo diff --git a/src/mongo/db/pipeline/expression.h b/src/mongo/db/pipeline/expression.h index ddd77682d36..9bf881ea241 100644 --- a/src/mongo/db/pipeline/expression.h +++ b/src/mongo/db/pipeline/expression.h @@ -2434,26 +2434,33 @@ private: boost::intrusive_ptr<Expression> _onNull; }; -class RegexMatchHandler { +class ExpressionRegex : public Expression { public: /** - * Object to hold data that is required by 'RegexMatchHandler' for calling 'execute()' or - * 'nextMatch()'. + * Object to hold data that is required when calling 'execute()' or 'nextMatch()'. */ struct RegexExecutionState { + /** + * The regex pattern, options, and captures buffer for the current execution context. + */ boost::optional<std::string> pattern; - std::string options; - int numCaptures = 0; + boost::optional<std::string> options; std::vector<int> capturesBuffer; + int numCaptures = 0; + /** - * If there is a constant regex, the underlying object of 'pcre' will be owned by - * 'RegexMatchHandler', as part of '_preExecutionState'. If not, it will be owned by - * 'RegexExecutionState'. + * If 'regex' is constant, 'pcrePtr' will be shared between the active RegexExecutionState + * and '_initialExecStateForConstantRegex'. If not, then the active RegexExecutionState is + * the sole owner. */ std::shared_ptr<pcre> pcrePtr; + + /** + * The input text and starting position for the current execution context. + */ boost::optional<std::string> input; - int startBytePos = 0; int startCodePointPos = 0; + int startBytePos = 0; /** * If either the text input or regex pattern is nullish, then we consider the operation as a @@ -2465,6 +2472,12 @@ public: }; /** + * Validates the structure of input passed in 'inputExpr'. If valid, generates an initial + * execution state. This returned object can later be used for calling execute() or nextMatch(). + */ + RegexExecutionState buildInitialState(const Document& root) const; + + /** * Checks if there is a match for the given input and pattern that are part of 'executionState'. * The method will return a positive number if there is a match and '-1' if there is no match. * Throws 'uassert()' for any errors. @@ -2482,91 +2495,121 @@ public: Value nextMatch(RegexExecutionState* executionState) const; /** - * Optimizes '$regex*' expressions. If the expression has a constant 'regex' and 'options' - * fields, then it can be optimized. Stores the optimized regex as part of '_constantRegex' so - * that it can be reused during expression evaluation. + * Optimizes '$regex*' expressions. If the expression has constant 'regex' and 'options' fields, + * then it can be optimized. Stores the optimized regex in '_initialExecStateForConstantRegex' + * so that it can be reused during expression evaluation. */ - void optimize(boost::intrusive_ptr<Expression> expression); + boost::intrusive_ptr<Expression> optimize(); - /** - * Validates the structure of input passed in 'inputExpr'. If valid, generates an initial - * execution state. This returned object can later be used for calling execute() or nextMatch(). - */ - RegexExecutionState buildInitialState(const Value& inputExpr) const; bool hasConstantRegex() const { return _initialExecStateForConstantRegex.has_value(); } + Value serialize(bool explain) const; + + const std::string& getOpName() const { + return _opName; + } + +protected: + ExpressionRegex(const boost::intrusive_ptr<ExpressionContext>& expCtx, + BSONElement expr, + const VariablesParseState& vpsIn, + const std::string& opName); + private: void _extractInputField(RegexExecutionState* executionState, const Value& textInput) const; void _extractRegexAndOptions(RegexExecutionState* executionState, const Value& regexPattern, const Value& regexOptions) const; + void _compile(RegexExecutionState* executionState) const; + + void _doAddDependencies(DepsTracker* deps) const final; + + /** + * Expressions which, when evaluated for a given document, produce the the regex pattern, the + * regex option flags, and the input text to which the regex should be applied. + */ + boost::intrusive_ptr<Expression> _input; + boost::intrusive_ptr<Expression> _regex; + boost::intrusive_ptr<Expression> _options; + /** * This variable will be set when the $regex* expressions have constant values for their 'regex' * and 'options' fields, allowing us to pre-compile the regex and re-use it across the * Expression's lifetime. */ boost::optional<RegexExecutionState> _initialExecStateForConstantRegex; + + /** + * Name of the regex expression. + */ + std::string _opName; }; -class ExpressionRegexFind final : public ExpressionFixedArity<ExpressionRegexFind, 1> { +class ExpressionRegexFind final : public ExpressionRegex { public: - explicit ExpressionRegexFind(const boost::intrusive_ptr<ExpressionContext>& expCtx) - : ExpressionFixedArity<ExpressionRegexFind, 1>(expCtx) {} + static boost::intrusive_ptr<Expression> parse( + const boost::intrusive_ptr<ExpressionContext>& expCtx, + BSONElement expr, + const VariablesParseState& vpsIn) { + return new ExpressionRegexFind(expCtx, expr, vpsIn); + } Value evaluate(const Document& root) const final; - boost::intrusive_ptr<Expression> optimize() final; - const char* getOpName() const final; void acceptVisitor(ExpressionVisitor* visitor) final { return visitor->visit(this); } - bool hasConstantRegex() const { - return _handler.hasConstantRegex(); - } private: - RegexMatchHandler _handler; + ExpressionRegexFind(const boost::intrusive_ptr<ExpressionContext>& expCtx, + BSONElement expr, + const VariablesParseState& vpsIn) + : ExpressionRegex(expCtx, expr, vpsIn, "$regexFind") {} }; -class ExpressionRegexFindAll final : public ExpressionFixedArity<ExpressionRegexFindAll, 1> { +class ExpressionRegexFindAll final : public ExpressionRegex { public: - explicit ExpressionRegexFindAll(const boost::intrusive_ptr<ExpressionContext>& expCtx) - : ExpressionFixedArity<ExpressionRegexFindAll, 1>(expCtx) {} + static boost::intrusive_ptr<Expression> parse( + const boost::intrusive_ptr<ExpressionContext>& expCtx, + BSONElement expr, + const VariablesParseState& vpsIn) { + return new ExpressionRegexFindAll(expCtx, expr, vpsIn); + } Value evaluate(const Document& root) const final; - boost::intrusive_ptr<Expression> optimize() final; - const char* getOpName() const final; void acceptVisitor(ExpressionVisitor* visitor) final { return visitor->visit(this); } - bool hasConstantRegex() const { - return _handler.hasConstantRegex(); - } private: - RegexMatchHandler _handler; + ExpressionRegexFindAll(const boost::intrusive_ptr<ExpressionContext>& expCtx, + BSONElement expr, + const VariablesParseState& vpsIn) + : ExpressionRegex(expCtx, expr, vpsIn, "$regexFindAll") {} }; -class ExpressionRegexMatch final : public ExpressionFixedArity<ExpressionRegexMatch, 1> { +class ExpressionRegexMatch final : public ExpressionRegex { public: - explicit ExpressionRegexMatch(const boost::intrusive_ptr<ExpressionContext>& expCtx) - : ExpressionFixedArity<ExpressionRegexMatch, 1>(expCtx) {} + static boost::intrusive_ptr<Expression> parse( + const boost::intrusive_ptr<ExpressionContext>& expCtx, + BSONElement expr, + const VariablesParseState& vpsIn) { + return new ExpressionRegexMatch(expCtx, expr, vpsIn); + } Value evaluate(const Document& root) const final; - boost::intrusive_ptr<Expression> optimize() final; - const char* getOpName() const final; void acceptVisitor(ExpressionVisitor* visitor) final { return visitor->visit(this); } - bool hasConstantRegex() const { - return _handler.hasConstantRegex(); - } private: - RegexMatchHandler _handler; + ExpressionRegexMatch(const boost::intrusive_ptr<ExpressionContext>& expCtx, + BSONElement expr, + const VariablesParseState& vpsIn) + : ExpressionRegex(expCtx, expr, vpsIn, "$regexMatch") {} }; } diff --git a/src/mongo/db/pipeline/expression_test.cpp b/src/mongo/db/pipeline/expression_test.cpp index c1ba68f4cd2..02bd8a55d8a 100644 --- a/src/mongo/db/pipeline/expression_test.cpp +++ b/src/mongo/db/pipeline/expression_test.cpp @@ -5958,10 +5958,10 @@ namespace ExpressionRegexTest { class ExpressionRegexTest { public: - template <typename SubClass, int N> + template <typename ExpressionRegexSubClass> static intrusive_ptr<Expression> generateOptimizedExpression(const BSONObj& input) { intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest()); - auto expression = ExpressionFixedArity<SubClass, N>::parse( + auto expression = ExpressionRegexSubClass::parse( expCtx, input.firstElement(), expCtx->variablesParseState); return expression->optimize(); } @@ -5971,7 +5971,7 @@ public: const std::vector<Value>& expectedFindAllOutput) { { // For $regexFindAll. - auto expression = generateOptimizedExpression<ExpressionRegexFindAll, 1>(input); + auto expression = generateOptimizedExpression<ExpressionRegexFindAll>(input); auto regexFindAllExpr = dynamic_cast<ExpressionRegexFindAll*>(expression.get()); ASSERT_EQ(regexFindAllExpr->hasConstantRegex(), optimized); Value output = regexFindAllExpr->evaluate(Document()); @@ -5980,7 +5980,7 @@ public: { // For $regexFind. - auto expression = generateOptimizedExpression<ExpressionRegexFind, 1>(input); + auto expression = generateOptimizedExpression<ExpressionRegexFind>(input); auto regexFindExpr = dynamic_cast<ExpressionRegexFind*>(expression.get()); ASSERT_EQ(regexFindExpr->hasConstantRegex(), optimized); Value output = regexFindExpr->evaluate(Document()); @@ -5990,7 +5990,7 @@ public: { // For $regexMatch. - auto expression = generateOptimizedExpression<ExpressionRegexMatch, 1>(input); + auto expression = generateOptimizedExpression<ExpressionRegexMatch>(input); auto regexMatchExpr = dynamic_cast<ExpressionRegexMatch*>(expression.get()); ASSERT_EQ(regexMatchExpr->hasConstantRegex(), optimized); Value output = regexMatchExpr->evaluate(Document()); @@ -6045,54 +6045,46 @@ TEST(ExpressionRegexTest, NoMatch) { fromjson("{$regexFindAll : {input: 'a1b2c3', regex: 'ab' }}"), true, {}); } -TEST(ExpressionRegexFindTest, FailureCase) { - Value input( - fromjson("{input: 'FirstLine\\nSecondLine', regex: {invalid : 'regex'} , options: 'mi'}")); - intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest()); - ExpressionRegexFind regexF(expCtx); - regexF.addOperand(ExpressionConstant::create(expCtx, input)); - ASSERT_THROWS_CODE(regexF.evaluate(Document()), DBException, 51105); -} - -TEST(ExpressionRegexFindAllTest, FailureCase) { - Value input(fromjson("{input: 'FirstLine\\nSecondLine', regex: '[0-9'}")); - intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest()); - ExpressionRegexFindAll regexF(expCtx); - regexF.addOperand(ExpressionConstant::create(expCtx, input)); - ASSERT_THROWS_CODE(regexF.evaluate(Document()), DBException, 51111); +TEST(ExpressionRegexTest, FailureCaseBadRegexType) { + ASSERT_THROWS_CODE(ExpressionRegexTest::testAllExpressions( + fromjson("{$regexFindAll : {input: 'FirstLine\\nSecondLine', regex: " + "{invalid : 'regex'} , options: 'mi'}}"), + false, + {}), + AssertionException, + 51105); } -TEST(ExpressionRegexMatchTest, FailureCase) { - Value input(fromjson("{regex: 'valid', input: {invalid : 'input'} , options: 'mi'}")); - intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest()); - ExpressionRegexMatch regexMatchExpr(expCtx); - regexMatchExpr.addOperand(ExpressionConstant::create(expCtx, input)); - ASSERT_THROWS_CODE(regexMatchExpr.evaluate(Document()), DBException, 51104); +TEST(ExpressionRegexTest, FailureCaseBadRegexPattern) { + ASSERT_THROWS_CODE( + ExpressionRegexTest::testAllExpressions( + fromjson("{$regexFindAll : {input: 'FirstLine\\nSecondLine', regex: '[0-9'}}"), + false, + {}), + AssertionException, + 51111); } -TEST(ExpressionRegexFindAllTest, InvalidUTF8InInput) { +TEST(ExpressionRegexTest, InvalidUTF8InInput) { std::string inputField = "1234 "; // Append an invalid UTF-8 character. inputField += '\xe5'; inputField += " 1234"; - Value input(fromjson("{input: '" + inputField + "', regex: '[0-9]'}")); - intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest()); - ExpressionRegexFindAll regexF(expCtx); - regexF.addOperand(ExpressionConstant::create(expCtx, input)); + BSONObj input(fromjson("{$regexFindAll: {input: '" + inputField + "', regex: '[0-9]'}}")); + // Verify that PCRE will error during execution if input is not a valid UTF-8. - ASSERT_THROWS_CODE(regexF.evaluate(Document()), DBException, 51156); + ASSERT_THROWS_CODE( + ExpressionRegexTest::testAllExpressions(input, true, {}), AssertionException, 51156); } -TEST(ExpressionRegexFindAllTest, InvalidUTF8InRegex) { +TEST(ExpressionRegexTest, InvalidUTF8InRegex) { std::string regexField = "1234 "; // Append an invalid UTF-8 character. regexField += '\xe5'; - Value input(fromjson("{input: '123456', regex: '" + regexField + "'}")); - intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest()); - ExpressionRegexFindAll regexF(expCtx); - regexF.addOperand(ExpressionConstant::create(expCtx, input)); + BSONObj input(fromjson("{$regexFindAll: {input: '123456', regex: '" + regexField + "'}}")); // Verify that PCRE will error if REGEX is not a valid UTF-8. - ASSERT_THROWS_CODE(regexF.evaluate(Document()), DBException, 51111); + ASSERT_THROWS_CODE( + ExpressionRegexTest::testAllExpressions(input, false, {}), AssertionException, 51111); } } // namespace ExpressionRegexTest |