summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArun Banala <arun.banala@10gen.com>2019-11-28 16:45:10 +0000
committerevergreen <evergreen@mongodb.com>2019-11-28 16:45:10 +0000
commit66334e9063743d14a309ab645d8c1b6b0fd95771 (patch)
tree7a6ec466ea483f79918807d83b1f8b6075b65303
parent15326566e663137685d32bca3604ebc8d6dfa687 (diff)
downloadmongo-66334e9063743d14a309ab645d8c1b6b0fd95771.tar.gz
SERVER-44617 $regexFind crash when one of the capture group doesn't match the input but pattern matches
(cherry picked from commit 294ff7c1f6a3f79342aace87be524532d222da9f)
-rw-r--r--jstests/aggregation/expressions/regex.js84
-rw-r--r--src/mongo/db/pipeline/expression.cpp42
2 files changed, 117 insertions, 9 deletions
diff --git a/jstests/aggregation/expressions/regex.js b/jstests/aggregation/expressions/regex.js
index bf128ec04cc..2dc400b0523 100644
--- a/jstests/aggregation/expressions/regex.js
+++ b/jstests/aggregation/expressions/regex.js
@@ -77,11 +77,13 @@ function testRegexAggException(inputObj, exceptionCode) {
{"match": "mp", "idx": 2, "captures": ["mp", "p"]},
{"match": "mp", "idx": 10, "captures": ["mp", "p"]}
]);
+
// Regex in json syntax, with multiple captures and matches.
testRegexFindAggForKey(0, {input: "$text", regex: /(m(p))/}, [
{"match": "mp", "idx": 2, "captures": ["mp", "p"]},
{"match": "mp", "idx": 10, "captures": ["mp", "p"]}
]);
+
// Verify no overlapping match sub-strings.
assert.commandWorked(coll.insert({_id: 112, text: "aaaaa aaaa"}));
testRegexFindAggForKey(112, {input: "$text", regex: /(aa)/}, [
@@ -94,6 +96,7 @@ function testRegexAggException(inputObj, exceptionCode) {
{"match": "aaaa", "idx": 0, "captures": ["aa"]},
{"match": "aaaa", "idx": 6, "captures": ["aa"]}
]);
+
// Verify greedy match.
testRegexFindAggForKey(112, {input: "$text", regex: /(a+)/}, [
{"match": "aaaaa", "idx": 0, "captures": ["aaaaa"]},
@@ -103,6 +106,7 @@ function testRegexAggException(inputObj, exceptionCode) {
{"match": "aaaaa", "idx": 0, "captures": ["a"]},
{"match": "aaaa", "idx": 6, "captures": ["a"]},
]);
+
// Verify lazy match.
assert.commandWorked(coll.insert({_id: 113, text: "aaa aa"}));
testRegexFindAggForKey(113, {input: "$text", regex: /(a+?)/}, [
@@ -152,6 +156,69 @@ function testRegexAggException(inputObj, exceptionCode) {
{input: "$text", regex: /^(?:1|a)\,([0-9]+)/},
[{"match": "1,2", "idx": 0, "captures": ["2"]}]);
+ assert.commandWorked(coll.insert({_id: "capture_group", text: "ABCDXYZABCDXYZABCDXYZ"}));
+
+ // Matching regex with a non-matching capture group.
+ testRegexFindAggForKey("capture_group", {input: "$text", regex: /((Z)|B)(CD)/}, [
+ {"match": "BCD", "idx": 1, "captures": ["B", null, "CD"]},
+ {"match": "BCD", "idx": 8, "captures": ["B", null, "CD"]},
+ {"match": "BCD", "idx": 15, "captures": ["B", null, "CD"]}
+ ]);
+
+ // Non-matching regex with a matching capture group.
+ testRegexFindAggForKey("capture_group", {input: "$text", regex: /EFG|(BC)E/}, []);
+ testRegexFindAggForKey("capture_group", {input: "$text", regex: /(BC)E/}, []);
+
+ // Matching regex with zero matching capture groups because of optional quantifier.
+ testRegexFindAggForKey("capture_group", {input: "$text", regex: /BC(P(DE)?)?/}, [
+ {"match": "BC", "idx": 1, "captures": [null, null]},
+ {"match": "BC", "idx": 8, "captures": [null, null]},
+ {"match": "BC", "idx": 15, "captures": [null, null]}
+ ]);
+
+ // Matching regex with non-matching capture groups because of optional quantifier.
+ testRegexFindAggForKey("capture_group", {input: "$text", regex: /(BC)(P(DE)?)?/}, [
+ {"match": "BC", "idx": 1, "captures": ["BC", null, null]},
+ {"match": "BC", "idx": 8, "captures": ["BC", null, null]},
+ {"match": "BC", "idx": 15, "captures": ["BC", null, null]}
+ ]);
+ testRegexFindAggForKey("capture_group", {input: "$text", regex: /(ABC)(D(XY)?)?Z/}, [
+ {"match": "ABCDXYZ", "idx": 0, "captures": ["ABC", "DXY", "XY"]},
+ {"match": "ABCDXYZ", "idx": 7, "captures": ["ABC", "DXY", "XY"]},
+ {"match": "ABCDXYZ", "idx": 14, "captures": ["ABC", "DXY", "XY"]}
+ ]);
+
+ // Greedy optional quantifier inside a lazy optional quantifier.
+ testRegexFindAggForKey("capture_group", {input: "$text", regex: /(ABC)(D(XY)?)??/}, [
+ {"match": "ABC", "idx": 0, "captures": ["ABC", null, null]},
+ {"match": "ABC", "idx": 7, "captures": ["ABC", null, null]},
+ {"match": "ABC", "idx": 14, "captures": ["ABC", null, null]}
+ ]);
+
+ // Lazy optional quantifier inside a greedy optional quantifier.
+ testRegexFindAggForKey("capture_group", {input: "$text", regex: /(ABC)(D(XY)??)?/}, [
+ {"match": "ABCD", "idx": 0, "captures": ["ABC", "D", null]},
+ {"match": "ABCD", "idx": 7, "captures": ["ABC", "D", null]},
+ {"match": "ABCD", "idx": 14, "captures": ["ABC", "D", null]}
+ ]);
+
+ // Everything is optional.
+ testRegexFindAggForKey("capture_group", {input: "$text", regex: /(ABC)?(XY)?/}, [
+ {"match": "ABC", "idx": 0, "captures": ["ABC", null]},
+ // Since everything is optional, every index position in the input is a match.
+ {"match": "", "idx": 3, "captures": [null, null]},
+ {"match": "XY", "idx": 4, "captures": [null, "XY"]},
+ {"match": "", "idx": 6, "captures": [null, null]},
+ {"match": "ABC", "idx": 7, "captures": ["ABC", null]},
+ {"match": "", "idx": 10, "captures": [null, null]},
+ {"match": "XY", "idx": 11, "captures": [null, "XY"]},
+ {"match": "", "idx": 13, "captures": [null, null]},
+ {"match": "ABC", "idx": 14, "captures": ["ABC", null]},
+ {"match": "", "idx": 17, "captures": [null, null]},
+ {"match": "XY", "idx": 18, "captures": [null, "XY"]},
+ {"match": "", "idx": 20, "captures": [null, null]},
+ ]);
+
// Regex quantifier.
assert.commandWorked(coll.insert({_id: 7, text: "abc12defgh345jklm"}));
testRegexFindAggForKey(
@@ -220,15 +287,23 @@ function testRegexAggException(inputObj, exceptionCode) {
{input: "$text", regex: "$pattern"},
[{"match": "Text", "idx": 5, "captures": ["Te", "e"]}]);
+ assert.commandWorked(coll.insert({_id: "Empty input", text: ""}));
+
// Empty input matches empty regex.
- testRegexFindAggForKey(0, {input: "", regex: ""}, [{"match": "", "idx": 0, "captures": []}]);
- // Empty captures groups.
+ testRegexFindAggForKey(
+ "Empty input", {input: "$text", regex: ""}, [{"match": "", "idx": 0, "captures": []}]);
+
+ // Empty capture groups.
+ testRegexFindAggForKey("Empty input",
+ {input: "$text", regex: "(missing)|()"},
+ [{"match": "", "idx": 0, "captures": [null, ""]}]);
testRegexFindAggForKey(0, {input: "bbbb", regex: "()"}, [
{"match": "", "idx": 0, "captures": [""]},
{"match": "", "idx": 1, "captures": [""]},
{"match": "", "idx": 2, "captures": [""]},
{"match": "", "idx": 3, "captures": [""]}
]);
+
// No matches.
testRegexFindAggForKey(0, {input: "$text", regex: /foo/}, []);
// Regex null.
@@ -275,8 +350,9 @@ function testRegexAggException(inputObj, exceptionCode) {
testRegexFindAggForKey(
3, {input: "$text", regex: /म/}, [{"match": "म", "idx": 0, "captures": []}]);
// Unicode with capture group.
- testRegexFindAggForKey(
- 3, {input: "$text", regex: /(गो )/}, [{"match": "गो ", "idx": 3, "captures": ["गो "]}]);
+ testRegexFindAggForKey(3,
+ {input: "$text", regex: /(गो )|(missing)/},
+ [{"match": "गो ", "idx": 3, "captures": ["गो ", null]}]);
// Test that regexes support Unicode character properties.
testRegexFindAggForKey(2, {input: "$text", regex: String.raw`\p{Hangul}`}, []);
testRegexFindAggForKey(2,
diff --git a/src/mongo/db/pipeline/expression.cpp b/src/mongo/db/pipeline/expression.cpp
index 689dcf21e84..387ff0ab1cb 100644
--- a/src/mongo/db/pipeline/expression.cpp
+++ b/src/mongo/db/pipeline/expression.cpp
@@ -5862,13 +5862,14 @@ int ExpressionRegex::execute(RegexExecutionState* regexState) const {
0, // No need to overwrite the options set during pcre_compile.
&(regexState->capturesBuffer.front()),
regexState->capturesBuffer.size());
- // The 'execResult' will be (numCaptures + 1) if there is a match, -1 if there is no match,
- // negative (other than -1) if there is an error during execution, and zero if capturesBuffer's
- // capacity is not sufficient to hold all the results. The latter scenario should never occur.
+ // The 'execResult' will be -1 if there is no match, 0 < execResult <= (numCaptures + 1)
+ // depending on how many capture groups match, negative (other than -1) if there is an error
+ // during execution, and zero if capturesBuffer's capacity is not sufficient to hold all the
+ // results. The latter scenario should never occur.
uassert(51156,
str::stream() << "Error occurred while executing the regular expression in " << _opName
<< ". Result code: " << execResult,
- execResult == -1 || execResult == (regexState->numCaptures + 1));
+ execResult == -1 || (execResult > 0 && execResult <= (regexState->numCaptures + 1)));
return execResult;
}
@@ -5884,9 +5885,35 @@ Value ExpressionRegex::nextMatch(RegexExecutionState* regexState) const {
// calls.
StringData input = *(regexState->input);
+ auto verifyBounds = [&input, this](auto startPos, auto limitPos, auto isCapture) {
+ // If a capture group was not matched, then the 'startPos' and 'limitPos' will both be -1.
+ // These bounds cannot occur for a match on the full string.
+ if (startPos == -1 || limitPos == -1) {
+ massert(31304,
+ str::stream() << "Unexpected error occurred while executing " << _opName
+ << ". startPos: " << startPos << ", limitPos: " << limitPos,
+ isCapture && startPos == -1 && limitPos == -1);
+ return;
+ }
+
+ massert(31305,
+ str::stream() << "Unexpected error occurred while executing " << _opName
+ << ". startPos: " << startPos,
+ (startPos >= 0 && static_cast<size_t>(startPos) <= input.size()));
+ massert(31306,
+ str::stream() << "Unexpected error occurred while executing " << _opName
+ << ". limitPos: " << limitPos,
+ (limitPos >= 0 && static_cast<size_t>(limitPos) <= input.size()));
+ massert(31307,
+ str::stream() << "Unexpected error occurred while executing " << _opName
+ << ". startPos: " << startPos << ", limitPos: " << limitPos,
+ startPos <= limitPos);
+ };
+
// The first and second entries of the 'capturesBuffer' will have the start and (end+1) indices
// of the matched string, as byte offsets. '(limit - startIndex)' would be the length of the
// captured string.
+ verifyBounds(regexState->capturesBuffer[0], regexState->capturesBuffer[1], false);
const int matchStartByteIndex = regexState->capturesBuffer[0];
StringData matchedStr =
input.substr(matchStartByteIndex, regexState->capturesBuffer[1] - matchStartByteIndex);
@@ -5911,7 +5938,12 @@ Value ExpressionRegex::nextMatch(RegexExecutionState* regexState) const {
for (int i = 0; i < regexState->numCaptures; ++i) {
const int start = regexState->capturesBuffer[2 * (i + 1)];
const int limit = regexState->capturesBuffer[2 * (i + 1) + 1];
- captures.push_back(Value(input.substr(start, limit - start)));
+ verifyBounds(start, limit, true);
+
+ // The 'start' and 'limit' will be set to -1, if the 'input' didn't match the current
+ // capture group. In this case we put a 'null' placeholder in place of the capture group.
+ captures.push_back(start == -1 && limit == -1 ? Value(BSONNULL)
+ : Value(input.substr(start, limit - start)));
}
MutableDocument match;