From b72e0f72f94f219fcc19654e0aa037482c0f7cee Mon Sep 17 00:00:00 2001 From: Arun Banala Date: Thu, 14 Mar 2019 17:06:06 +0000 Subject: SERVER-39694 Implement $regexMatch as syntactic sugar on top of $regexFind --- jstests/aggregation/expressions/regex.js | 56 ++++++++++++++++++++++++------- src/mongo/db/pipeline/expression.cpp | 39 +++++++++++++++------ src/mongo/db/pipeline/expression.h | 11 +++++- src/mongo/db/pipeline/expression_test.cpp | 27 ++++++++++++++- 4 files changed, 108 insertions(+), 25 deletions(-) diff --git a/jstests/aggregation/expressions/regex.js b/jstests/aggregation/expressions/regex.js index 31b04b54976..fed44601ae7 100644 --- a/jstests/aggregation/expressions/regex.js +++ b/jstests/aggregation/expressions/regex.js @@ -1,5 +1,5 @@ /* - * Tests for $regexFind and $regexFindAll aggregation expression. + * Tests for $regexFind, $regexFindAll and $regexMatch aggregation expressions. */ (function() { 'use strict'; @@ -27,35 +27,49 @@ } /** - * This function validates the output against both $regexFind and $regexFindAll expressions. + * This function validates the output against $regexFind, $regexFindAll and $regexMatch + * expressions. */ function testRegexFindAgg(inputObj, expectedOutputForFindAll) { testRegex("$regexFindAll", inputObj, expectedOutputForFindAll); - // For each of the output document, get first element from "matches" array. This will // convert 'regexFindAll' output to 'regexFind' output. const expectedOutputForFind = expectedOutputForFindAll.map( (element) => ({matches: element.matches.length == 0 ? null : element.matches[0]})); testRegex("$regexFind", inputObj, expectedOutputForFind); + + // For each of the output document, if there is at least one element in the array, then + // there is a match. + const expectedOutputForMatch = + expectedOutputForFindAll.map((element) => ({matches: element.matches.length != 0})); + testRegex("$regexMatch", inputObj, expectedOutputForMatch); } /** - * This function validates the output against both $regexFind and $regexFindAll expressions. + * This function validates the output against $regexFind, $regexFindAll and $regexMatch + * expressions. */ function testRegexFindAggForKey(key, inputObj, expectedOutputForFindAll) { testRegexForKey("$regexFindAll", key, inputObj, expectedOutputForFindAll); + const expectedOutputForFind = expectedOutputForFindAll.length == 0 ? null : expectedOutputForFindAll[0]; testRegexForKey("$regexFind", key, inputObj, expectedOutputForFind); + + const expectedOutputForMatch = expectedOutputForFindAll.length != 0; + testRegexForKey("$regexMatch", key, inputObj, expectedOutputForMatch); } /** - * This function validates the output against both $regexFind and $regexFindAll expressions. + * This function validates the output against $regexFind, $regexFindAll and $regexMatch + * expressions. */ function testRegexAggException(inputObj, exceptionCode) { assertErrorCode( coll, [{"$project": {"matches": {"$regexFindAll": inputObj}}}], exceptionCode); assertErrorCode(coll, [{"$project": {"matches": {"$regexFind": inputObj}}}], exceptionCode); + assertErrorCode( + coll, [{"$project": {"matches": {"$regexMatch": inputObj}}}], exceptionCode); } (function testWithSingleMatch() { @@ -349,6 +363,7 @@ testRegexAggException({input: "$text", regex: "valid", options: 'a'}, 51108); // 'options' are case-sensitive. testRegexAggException({input: "$text", regex: "valid", options: "I"}, 51108); + testRegexAggException({options: "I"}, 51108); // Options specified in both 'regex' and 'options'. testRegexAggException({input: "$text", regex: /(m(p))/i, options: "i"}, 51107); testRegexAggException({input: "$text", regex: /(m(p))/i, options: "x"}, 51107); @@ -399,7 +414,7 @@ const expectedResults = [{"_id": 0, "information": "Company Name"}, {"_id": 1, "information": "REDACTED"}]; // For $regexFindAll. - let result = + const resultFindAll = coll.aggregate([{ "$project": { "information": { @@ -415,24 +430,41 @@ } }]) .toArray(); - assert.eq(result, expectedResults); + assert.eq(resultFindAll, expectedResults); + // For $regexMatch. + const resultMatch = + coll.aggregate([{ + "$project": { + "information": { + "$cond": [ + {"$regexMatch": {input: "$level", regex: /public/i}}, + "$info", + "REDACTED" + ] + } + } + }]) + .toArray(); // For $regexFind. - result = + const resultFind = coll.aggregate([{ "$project": { "information": { "$cond": [ { - "$eq": + "$ne": [{"$regexFind": {input: "$level", regex: /public/i}}, null] }, - "REDACTED", - "$info" + "$info", + "REDACTED" ] } } }]) .toArray(); - assert.eq(result, expectedResults); + // Validate that {$ne : [{$regexFind: ...}, null]} produces same result as + // {$regexMatch: ...}. + assert.eq(resultFind, resultMatch); + assert.eq(resultFind, expectedResults); })(); }()); diff --git a/src/mongo/db/pipeline/expression.cpp b/src/mongo/db/pipeline/expression.cpp index f2bd565989e..629bfd847f8 100644 --- a/src/mongo/db/pipeline/expression.cpp +++ b/src/mongo/db/pipeline/expression.cpp @@ -5674,6 +5674,22 @@ public: } } + int execute(int startBytePos) { + int execResult = pcre_exec(_pcre, + 0, + &_input.front(), + _input.size(), + startBytePos, + 0, // No need to overwrite the options set during pcre_compile. + &_capturesBuffer.front(), + _capturesBuffer.size()); + // The 'execResult' will be '(_numCaptures + 1)' if there is a match, negative if there is + // no match, and zero if _capturesBuffer's capacity is not sufficient to hold all the + // results; the latter scenario should never actually occur. + invariant(execResult < 0 || execResult == _numCaptures + 1); + return execResult; + } + /** * The function will match '_input' string based on the regex pattern present in '_pcre'. If * there is a match, the function will return a 'Value' object encapsulating the matched string, @@ -5688,21 +5704,11 @@ public: // Use input as StringData throughout the function to avoid copying the string on 'substr' // calls. StringData input = _input; - int execResult = pcre_exec(_pcre, - 0, - input.rawData(), - input.size(), - *startBytePos, - 0, // No need to overwrite the options set during pcre_compile. - &_capturesBuffer.front(), - _capturesBuffer.size()); + int execResult = execute(*startBytePos); // No match. if (execResult < 0) { return Value(BSONNULL); } - // The 'execResult' will be zero if _capturesBuffer's size is not big enough to hold all - // the captures, which should never be the case. - invariant(execResult == _numCaptures + 1); // The first and second entries of the '_capturesBuffer' will have the start and limit // indices of the matched string, as byte offsets. '(limit - startIndex)' would be the @@ -5905,4 +5911,15 @@ const char* ExpressionRegexFindAll::getOpName() const { return "$regexFindAll"; } +Value ExpressionRegexMatch::evaluate(const Document& root) const { + RegexMatchHandler regex(vpOperand[0]->evaluate(root)); + // Return output of execute only if regex is not nullish. + return regex.nullish() ? Value(false) : Value(regex.execute(0) > 0); +} + +REGISTER_EXPRESSION(regexMatch, ExpressionRegexMatch::parse); +const char* ExpressionRegexMatch::getOpName() const { + return "$regexMatch"; +} + } // namespace mongo diff --git a/src/mongo/db/pipeline/expression.h b/src/mongo/db/pipeline/expression.h index b0949cca3fc..f5b7ec4fc12 100644 --- a/src/mongo/db/pipeline/expression.h +++ b/src/mongo/db/pipeline/expression.h @@ -2093,7 +2093,7 @@ private: boost::intrusive_ptr _onNull; }; -class ExpressionRegexFind : public ExpressionFixedArity { +class ExpressionRegexFind final : public ExpressionFixedArity { public: explicit ExpressionRegexFind(const boost::intrusive_ptr& expCtx) : ExpressionFixedArity(expCtx) {} @@ -2110,4 +2110,13 @@ public: Value evaluate(const Document& root) const final; const char* getOpName() const final; }; + +class ExpressionRegexMatch final : public ExpressionFixedArity { +public: + explicit ExpressionRegexMatch(const boost::intrusive_ptr& expCtx) + : ExpressionFixedArity(expCtx) {} + + Value evaluate(const Document& root) const final; + const char* getOpName() const final; +}; } diff --git a/src/mongo/db/pipeline/expression_test.cpp b/src/mongo/db/pipeline/expression_test.cpp index c9bc46a2c8b..2cdc6bb5a30 100644 --- a/src/mongo/db/pipeline/expression_test.cpp +++ b/src/mongo/db/pipeline/expression_test.cpp @@ -5975,7 +5975,6 @@ TEST(ExpressionRegexFindTest, ExtendedRegexOptions) { TEST(ExpressionRegexFindTest, FailureCase) { Value input( fromjson("{input: 'FirstLine\\nSecondLine', regex: {invalid : 'regex'} , options: 'mi'}")); - BSONObj expectedOut(fromjson("{match: 'Second', idx:10, captures:[]}")); intrusive_ptr expCtx(new ExpressionContextForTest()); ExpressionRegexFind regexF(expCtx); regexF.addOperand(ExpressionConstant::create(expCtx, input)); @@ -6036,6 +6035,32 @@ TEST(ExpressionRegexFindAllTest, InvalidUTF8InRegex) { ASSERT_THROWS_CODE(regexF.evaluate(Document()), DBException, 51111); } +TEST(ExpressionRegexMatchTest, NoMatch) { + Value input(fromjson("{input: 'asdf', regex: '^sd' }")); + Value expectedOut(false); + intrusive_ptr expCtx(new ExpressionContextForTest()); + ExpressionRegexMatch regexMatchExpr(expCtx); + regexMatchExpr.addOperand(ExpressionConstant::create(expCtx, input)); + ASSERT_VALUE_EQ(regexMatchExpr.evaluate(Document()), expectedOut); +} + +TEST(ExpressionRegexMatchTest, ExtendedRegexOptions) { + Value input(fromjson("{input: 'FirstLine\\nSecondLine', regex: '^second' , options: 'mi'}")); + Value expectedOut(true); + intrusive_ptr expCtx(new ExpressionContextForTest()); + ExpressionRegexMatch regexMatchExpr(expCtx); + regexMatchExpr.addOperand(ExpressionConstant::create(expCtx, input)); + ASSERT_VALUE_EQ(regexMatchExpr.evaluate(Document()), expectedOut); +} + +TEST(ExpressionRegexMatchTest, FailureCase) { + Value input(fromjson("{regex: 'valid', input: {invalid : 'input'} , options: 'mi'}")); + intrusive_ptr expCtx(new ExpressionContextForTest()); + ExpressionRegexMatch regexMatchExpr(expCtx); + regexMatchExpr.addOperand(ExpressionConstant::create(expCtx, input)); + ASSERT_THROWS_CODE(regexMatchExpr.evaluate(Document()), DBException, 51104); +} + } // namespace ExpressionRegexTest class All : public Suite { -- cgit v1.2.1