summaryrefslogtreecommitdiff
path: root/jstests/aggregation/expressions
diff options
context:
space:
mode:
authorArun Banala <arun.banala@mongodb.com>2019-03-08 20:26:09 +0000
committerArun Banala <arun.banala@mongodb.com>2019-03-20 15:09:26 +0000
commit12a560bff2911a29103d05071e260060c77263eb (patch)
tree12e3aee5e0bd4ead46f309b12d7eb598f8277df7 /jstests/aggregation/expressions
parent38c94f316b167e4b54b54ba8d12dbec33c7c5165 (diff)
downloadmongo-12a560bff2911a29103d05071e260060c77263eb.tar.gz
SERVER-39696 Implement $regexFindAll
Diffstat (limited to 'jstests/aggregation/expressions')
-rw-r--r--jstests/aggregation/expressions/regex.js438
-rw-r--r--jstests/aggregation/expressions/regexFind.js339
2 files changed, 438 insertions, 339 deletions
diff --git a/jstests/aggregation/expressions/regex.js b/jstests/aggregation/expressions/regex.js
new file mode 100644
index 00000000000..31b04b54976
--- /dev/null
+++ b/jstests/aggregation/expressions/regex.js
@@ -0,0 +1,438 @@
+/*
+ * Tests for $regexFind and $regexFindAll aggregation expression.
+ */
+(function() {
+ 'use strict';
+ load("jstests/aggregation/extras/utils.js"); // For assertErrorCode().
+ const coll = db.regex_find_expr;
+ coll.drop();
+
+ function testRegex(expression, inputObj, expectedOutput) {
+ const result =
+ coll.aggregate([
+ {"$project": {_id: 0, "matches": {[expression]: inputObj}}},
+ {"$sort": {"matches": 1}} // Sort to ensure the documents are returned in a
+ // deterministic order for sharded clusters.
+ ])
+ .toArray();
+ assert.eq(result, expectedOutput);
+ }
+ function testRegexForKey(expression, key, inputObj, expectedMatchObj) {
+ const result =
+ coll.aggregate(
+ [{"$match": {"_id": key}}, {"$project": {"matches": {[expression]: inputObj}}}])
+ .toArray();
+ const expectedOutput = [{"_id": key, "matches": expectedMatchObj}];
+ assert.eq(result, expectedOutput);
+ }
+
+ /**
+ * This function validates the output against both $regexFind and $regexFindAll expressions.
+ */
+ function testRegexFindAgg(inputObj, expectedOutputForFindAll) {
+ testRegex("$regexFindAll", inputObj, expectedOutputForFindAll);
+
+ // For each of the output document, get first element from "matches" array. This will
+ // convert 'regexFindAll' output to 'regexFind' output.
+ const expectedOutputForFind = expectedOutputForFindAll.map(
+ (element) => ({matches: element.matches.length == 0 ? null : element.matches[0]}));
+ testRegex("$regexFind", inputObj, expectedOutputForFind);
+ }
+
+ /**
+ * This function validates the output against both $regexFind and $regexFindAll expressions.
+ */
+ function testRegexFindAggForKey(key, inputObj, expectedOutputForFindAll) {
+ testRegexForKey("$regexFindAll", key, inputObj, expectedOutputForFindAll);
+ const expectedOutputForFind =
+ expectedOutputForFindAll.length == 0 ? null : expectedOutputForFindAll[0];
+ testRegexForKey("$regexFind", key, inputObj, expectedOutputForFind);
+ }
+
+ /**
+ * This function validates the output against both $regexFind and $regexFindAll expressions.
+ */
+ function testRegexAggException(inputObj, exceptionCode) {
+ assertErrorCode(
+ coll, [{"$project": {"matches": {"$regexFindAll": inputObj}}}], exceptionCode);
+ assertErrorCode(coll, [{"$project": {"matches": {"$regexFind": inputObj}}}], exceptionCode);
+ }
+
+ (function testWithSingleMatch() {
+ // Regex in string notation, find with multiple captures and matches.
+ assert.commandWorked(coll.insert({_id: 0, text: "Simple Example "}));
+ testRegexFindAggForKey(0, {input: "$text", regex: "(m(p))"}, [
+ {"match": "mp", "idx": 2, "captures": ["mp", "p"]},
+ {"match": "mp", "idx": 10, "captures": ["mp", "p"]}
+ ]);
+ // Regex in json syntax, with multiple captures and matches.
+ testRegexFindAggForKey(0, {input: "$text", regex: /(m(p))/}, [
+ {"match": "mp", "idx": 2, "captures": ["mp", "p"]},
+ {"match": "mp", "idx": 10, "captures": ["mp", "p"]}
+ ]);
+ // Verify no overlapping match sub-strings.
+ assert.commandWorked(coll.insert({_id: 112, text: "aaaaa aaaa"}));
+ testRegexFindAggForKey(112, {input: "$text", regex: /(aa)/}, [
+ {"match": "aa", "idx": 0, "captures": ["aa"]},
+ {"match": "aa", "idx": 2, "captures": ["aa"]},
+ {"match": "aa", "idx": 6, "captures": ["aa"]},
+ {"match": "aa", "idx": 8, "captures": ["aa"]}
+ ]);
+ testRegexFindAggForKey(112, {input: "$text", regex: /(aa)+/}, [
+ {"match": "aaaa", "idx": 0, "captures": ["aa"]},
+ {"match": "aaaa", "idx": 6, "captures": ["aa"]}
+ ]);
+ // Verify greedy match.
+ testRegexFindAggForKey(112, {input: "$text", regex: /(a+)/}, [
+ {"match": "aaaaa", "idx": 0, "captures": ["aaaaa"]},
+ {"match": "aaaa", "idx": 6, "captures": ["aaaa"]},
+ ]);
+ testRegexFindAggForKey(112, {input: "$text", regex: /(a)+/}, [
+ {"match": "aaaaa", "idx": 0, "captures": ["a"]},
+ {"match": "aaaa", "idx": 6, "captures": ["a"]},
+ ]);
+ // Verify lazy match.
+ assert.commandWorked(coll.insert({_id: 113, text: "aaa aa"}));
+ testRegexFindAggForKey(113, {input: "$text", regex: /(a+?)/}, [
+ {"match": "a", "idx": 0, "captures": ["a"]},
+ {"match": "a", "idx": 1, "captures": ["a"]},
+ {"match": "a", "idx": 2, "captures": ["a"]},
+ {"match": "a", "idx": 4, "captures": ["a"]},
+ {"match": "a", "idx": 5, "captures": ["a"]}
+ ]);
+ testRegexFindAggForKey(113, {input: "$text", regex: /(a*?)/}, [
+ {"match": "", "idx": 0, "captures": [""]},
+ {"match": "", "idx": 1, "captures": [""]},
+ {"match": "", "idx": 2, "captures": [""]},
+ {"match": "", "idx": 3, "captures": [""]},
+ {"match": "", "idx": 4, "captures": [""]},
+ {"match": "", "idx": 5, "captures": [""]}
+ ]);
+
+ // Regex string groups within group.
+ testRegexFindAggForKey(
+ 0,
+ {input: "$text", regex: "((S)(i)(m)(p)(l)(e))"},
+ [{"match": "Simple", "idx": 0, "captures": ["Simple", "S", "i", "m", "p", "l", "e"]}]);
+ testRegexFindAggForKey(
+ 0,
+ {input: "$text", regex: "(S)(i)(m)((p)(l)(e))"},
+ [{"match": "Simple", "idx": 0, "captures": ["S", "i", "m", "ple", "p", "l", "e"]}]);
+
+ // Regex email pattern.
+ assert.commandWorked(
+ coll.insert({_id: 1, text: "Some field text with email mongo@mongodb.com"}));
+ testRegexFindAggForKey(
+ 1,
+ {input: "$text", regex: "([a-zA-Z0-9._-]+)@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+"},
+ [{"match": "mongo@mongodb.com", "idx": 27, "captures": ["mongo"]}]);
+
+ // Regex digits.
+ assert.commandWorked(coll.insert({_id: 5, text: "Text with 02 digits"}));
+ testRegexFindAggForKey(
+ 5, {input: "$text", regex: /[0-9]+/}, [{"match": "02", "idx": 10, "captures": []}]);
+ testRegexFindAggForKey(
+ 5, {input: "$text", regex: /(\d+)/}, [{"match": "02", "idx": 10, "captures": ["02"]}]);
+
+ // Regex a non-capture group.
+ assert.commandWorked(coll.insert({_id: 6, text: "1,2,3,4,5,6,7,8,9,10"}));
+ testRegexFindAggForKey(6,
+ {input: "$text", regex: /^(?:1|a)\,([0-9]+)/},
+ [{"match": "1,2", "idx": 0, "captures": ["2"]}]);
+
+ // Regex quantifier.
+ assert.commandWorked(coll.insert({_id: 7, text: "abc12defgh345jklm"}));
+ testRegexFindAggForKey(
+ 7, {input: "$text", regex: /[0-9]{3}/}, [{"match": "345", "idx": 10, "captures": []}]);
+
+ // Regex case insensitive option.
+ assert.commandWorked(coll.insert({_id: 8, text: "This Is Camel Case"}));
+ testRegexFindAggForKey(8, {input: "$text", regex: /camel/}, []);
+ testRegexFindAggForKey(
+ 8, {input: "$text", regex: /camel/i}, [{"match": "Camel", "idx": 8, "captures": []}]);
+ testRegexFindAggForKey(8,
+ {input: "$text", regex: /camel/, options: "i"},
+ [{"match": "Camel", "idx": 8, "captures": []}]);
+ testRegexFindAggForKey(8,
+ {input: "$text", regex: "camel", options: "i"},
+ [{"match": "Camel", "idx": 8, "captures": []}]);
+
+ // Regex multi line option.
+ assert.commandWorked(coll.insert({_id: 9, text: "Foo line1\nFoo line2\nFoo line3"}));
+ // Verify no match with options flag off.
+ testRegexFindAggForKey(9, {input: "$text", regex: /^Foo line\d$/}, []);
+ // Verify match when flag is on.
+ testRegexFindAggForKey(9, {input: "$text", regex: /(^Foo line\d$)/m}, [
+ {"match": "Foo line1", "idx": 0, "captures": ["Foo line1"]},
+ {"match": "Foo line2", "idx": 10, "captures": ["Foo line2"]},
+ {"match": "Foo line3", "idx": 20, "captures": ["Foo line3"]}
+ ]);
+
+ // Regex single line option.
+ testRegexFindAggForKey(9, {input: "$text", regex: "Foo.*line"}, [
+ {"match": "Foo line", "idx": 0, "captures": []},
+ {"match": "Foo line", "idx": 10, "captures": []},
+ {"match": "Foo line", "idx": 20, "captures": []}
+ ]);
+ testRegexFindAggForKey(
+ 9,
+ {input: "$text", regex: "Foo.*line", options: "s"},
+ [{"match": "Foo line1\nFoo line2\nFoo line", "idx": 0, "captures": []}]);
+
+ // Regex extended option.
+ testRegexFindAggForKey(9, {input: "$text", regex: "F o o # a comment"}, []);
+ testRegexFindAggForKey(9, {input: "$text", regex: "F o o # a comment", options: "x"}, [
+ {"match": "Foo", "idx": 0, "captures": []},
+ {"match": "Foo", "idx": 10, "captures": []},
+ {"match": "Foo", "idx": 20, "captures": []}
+ ]);
+ testRegexFindAggForKey(
+ 9, {input: "$text", regex: "F o o # a comment \n\n# ignored", options: "x"}, [
+ {"match": "Foo", "idx": 0, "captures": []},
+ {"match": "Foo", "idx": 10, "captures": []},
+ {"match": "Foo", "idx": 20, "captures": []}
+ ]);
+ testRegexFindAggForKey(9, {input: "$text", regex: "(F o o) # a comment", options: "x"}, [
+ {"match": "Foo", "idx": 0, "captures": ["Foo"]},
+ {"match": "Foo", "idx": 10, "captures": ["Foo"]},
+ {"match": "Foo", "idx": 20, "captures": ["Foo"]}
+ ]);
+
+ // Regex pattern from a document field value.
+ assert.commandWorked(
+ coll.insert({_id: 10, text: "Simple Value Example", pattern: "(m(p))"}));
+ testRegexFindAggForKey(10, {input: "$text", regex: "$pattern"}, [
+ {"match": "mp", "idx": 2, "captures": ["mp", "p"]},
+ {"match": "mp", "idx": 16, "captures": ["mp", "p"]}
+ ]);
+ assert.commandWorked(coll.insert({_id: 11, text: "OtherText", pattern: /(T(e))xt$/}));
+ testRegexFindAggForKey(11,
+ {input: "$text", regex: "$pattern"},
+ [{"match": "Text", "idx": 5, "captures": ["Te", "e"]}]);
+
+ // Empty input matches empty regex.
+ testRegexFindAggForKey(
+ 0, {input: "", regex: ""}, [{"match": "", "idx": 0, "captures": []}]);
+ // Empty captures groups.
+ testRegexFindAggForKey(0, {input: "bbbb", regex: "()"}, [
+ {"match": "", "idx": 0, "captures": [""]},
+ {"match": "", "idx": 1, "captures": [""]},
+ {"match": "", "idx": 2, "captures": [""]},
+ {"match": "", "idx": 3, "captures": [""]}
+ ]);
+ // No matches.
+ testRegexFindAggForKey(0, {input: "$text", regex: /foo/}, []);
+ // Regex null.
+ testRegexFindAggForKey(0, {input: "$text", regex: null}, []);
+ // Regex not present.
+ testRegexFindAggForKey(0, {input: "$text"}, []);
+ // Input not present.
+ testRegexFindAggForKey(0, {regex: /valid/}, []);
+ // Input null.
+ testRegexFindAggForKey(0, {input: null, regex: /valid/}, []);
+ // Empty object.
+ testRegexFindAggForKey(0, {}, []);
+ })();
+
+ (function testWithStartOptions() {
+ coll.drop();
+ assert.commandWorked(coll.insert({_id: 2, text: "cafétéria"}));
+ assert.commandWorked(coll.insert({_id: 3, text: "ab\ncd"}));
+
+ // LIMIT_MATCH option to limit the number of comparisons PCRE does internally.
+ testRegexFindAggForKey(2, {input: "$text", regex: "(*LIMIT_MATCH=1)fé"}, []);
+ testRegexFindAggForKey(2,
+ {input: "$text", regex: "(*LIMIT_MATCH=3)(fé)"},
+ [{"match": "fé", "idx": 2, "captures": ["fé"]}]);
+
+ // (*LF) would change the feed system to UNIX like and (*CR) to windows like. So '\n' would
+ // match '.' with CR but not LF.
+ testRegexFindAggForKey(3, {input: "$text", regex: "(*LF)ab.cd"}, []);
+ testRegexFindAggForKey(3,
+ {input: "$text", regex: "(*CR)ab.cd"},
+ [{"match": "ab\ncd", "idx": 0, "captures": []}]);
+
+ // Multiple start options.
+ testRegexFindAggForKey(2,
+ {input: "$text", regex: String.raw `(*LIMIT_MATCH=5)(*UCP)^(\w+)`},
+ [{"match": "cafétéria", "idx": 0, "captures": ["cafétéria"]}]);
+ testRegexFindAggForKey(
+ 2, {input: "$text", regex: String.raw `(*LIMIT_MATCH=1)(*UCP)^(\w+)`}, []);
+ })();
+
+ (function testWithUnicodeData() {
+ coll.drop();
+ // Unicode index counting.
+ assert.commandWorked(coll.insert({_id: 2, text: "cafétéria"}));
+ assert.commandWorked(coll.insert({_id: 3, text: "मा०गो डीबि"}));
+ testRegexFindAggForKey(
+ 2, {input: "$text", regex: "té"}, [{"match": "té", "idx": 4, "captures": []}]);
+ testRegexFindAggForKey(
+ 3, {input: "$text", regex: /म/}, [{"match": "म", "idx": 0, "captures": []}]);
+ // Unicode with capture group.
+ testRegexFindAggForKey(3,
+ {input: "$text", regex: /(गो )/},
+ [{"match": "गो ", "idx": 3, "captures": ["गो "]}]);
+ // Test that regexes support Unicode character properties.
+ testRegexFindAggForKey(2, {input: "$text", regex: String.raw `\p{Hangul}`}, []);
+ testRegexFindAggForKey(2,
+ {input: "$text", regex: String.raw `\p{Latin}+$`},
+ [{"match": "cafétéria", "idx": 0, "captures": []}]);
+ // Test that the (*UTF) and (*UTF8) options are accepted for unicode characters.
+ assert.commandWorked(coll.insert({_id: 12, text: "༢༣༤༤༤༥12༥A"}));
+ testRegexFindAggForKey(12, {input: "$text", regex: "(*UTF8)༤"}, [
+ {"match": "༤", "idx": 2, "captures": []},
+ {"match": "༤", "idx": 3, "captures": []},
+ {"match": "༤", "idx": 4, "captures": []}
+ ]);
+ testRegexFindAggForKey(12, {input: "$text", regex: "(*UTF)༤"}, [
+ {"match": "༤", "idx": 2, "captures": []},
+ {"match": "༤", "idx": 3, "captures": []},
+ {"match": "༤", "idx": 4, "captures": []}
+ ]);
+ // For ASCII characters.
+ assert.commandWorked(coll.insert({_id: 4, text: "123444"}));
+ testRegexFindAggForKey(4,
+ {input: "$text", regex: "(*UTF8)(44)"},
+ [{"match": "44", "idx": 3, "captures": ["44"]}]);
+ testRegexFindAggForKey(4,
+ {input: "$text", regex: "(*UTF)(44)"},
+ [{"match": "44", "idx": 3, "captures": ["44"]}]);
+
+ // When the (*UCP) option is specified, Unicode "word" characters are included in the '\w'
+ // character type.
+ testRegexFindAggForKey(12,
+ {input: "$text", regex: String.raw `(*UCP)^(\w+)`},
+ [{"match": "༢༣༤༤༤༥12༥A", "idx": 0, "captures": ["༢༣༤༤༤༥12༥A"]}]);
+ // When the (*UCP) option is specified, [:digit:] becomes \p{N} and matches all Unicode
+ // decimal digit characters.
+ testRegexFindAggForKey(12,
+ {input: "$text", regex: "(*UCP)^[[:digit:]]+"},
+ [{"match": "༢༣༤༤༤༥12༥", "idx": 0, "captures": []}]);
+ testRegexFindAggForKey(12, {input: "$text", regex: "(*UCP)[[:digit:]]+$"}, []);
+ // When the (*UCP) option is specified, [:alpha:] becomes \p{L} and matches all Unicode
+ // alphabetic characters.
+ assert.commandWorked(coll.insert({_id: 13, text: "박정수AB"}));
+ testRegexFindAggForKey(13,
+ {input: "$text", regex: String.raw `(*UCP)^[[:alpha:]]+`},
+ [{"match": "박정수AB", "idx": 0, "captures": []}]);
+
+ // No match when options are not set.
+ testRegexFindAggForKey(12, {input: "$text", regex: String.raw `^(\w+)`}, []);
+ testRegexFindAggForKey(12, {input: "$text", regex: "^[[:digit:]]"}, []);
+ testRegexFindAggForKey(2, {input: "$text", regex: "^[[:alpha:]]+$"}, []);
+ })();
+
+ (function testErrors() {
+ coll.drop();
+ assert.commandWorked(coll.insert({text: "string"}));
+ // Null object.
+ testRegexAggException(null, 51103);
+ // Incorrect object parameter.
+ testRegexAggException("incorrect type", 51103);
+ // Test malformed regex.
+ testRegexAggException({input: "$text", regex: "[0-9"}, 51111);
+ testRegexAggException({regex: "[a-c"}, 51111);
+ // Malformed regex because start options not at the beginning.
+ testRegexAggException({input: "$text", regex: "^(*UCP)[[:alpha:]]+$"}, 51111);
+ testRegexAggException({input: "$text", regex: "((*UCP)[[:alpha:]]+$)"}, 51111);
+ // At least one of the 'input' field is not string.
+ assert.commandWorked(coll.insert({a: "string"}));
+ assert.commandWorked(coll.insert({a: {b: "object"}}));
+ testRegexAggException({input: "$a", regex: "valid"}, 51104);
+ testRegexAggException({input: "$a"}, 51104);
+ // 'regex' field is not string or regex.
+ testRegexAggException({input: "$text", regex: ["incorrect"]}, 51105);
+ // 'options' field is not string.
+ testRegexAggException({input: "$text", regex: "valid", options: 123}, 51106);
+ // Incorrect 'options' flag.
+ testRegexAggException({input: "$text", regex: "valid", options: 'a'}, 51108);
+ // 'options' are case-sensitive.
+ testRegexAggException({input: "$text", regex: "valid", options: "I"}, 51108);
+ // Options specified in both 'regex' and 'options'.
+ testRegexAggException({input: "$text", regex: /(m(p))/i, options: "i"}, 51107);
+ testRegexAggException({input: "$text", regex: /(m(p))/i, options: "x"}, 51107);
+ testRegexAggException({input: "$text", regex: /(m(p))/m, options: ""}, 51107);
+ // 'regex' as string with null characters.
+ testRegexAggException({input: "$text", regex: "sasd\0", options: "i"}, 51109);
+ testRegexAggException({regex: "sa\x00sd", options: "i"}, 51109);
+ // 'options' as string with null characters.
+ testRegexAggException({input: "$text", regex: /(m(p))/, options: "i\0"}, 51110);
+ testRegexAggException({input: "$text", options: "i\x00"}, 51110);
+ })();
+
+ (function testMultipleMatches() {
+ coll.drop();
+ assert.commandWorked(coll.insert({a: "string1string2"}));
+ assert.commandWorked(coll.insert({a: "string3 string4"}));
+ // Both match.
+ testRegexFindAgg({input: "$a", regex: "(str.*?[0-9])"}, [
+ {
+ "matches": [
+ {"match": "string1", "idx": 0, "captures": ["string1"]},
+ {"match": "string2", "idx": 7, "captures": ["string2"]}
+ ]
+ },
+ {
+ "matches": [
+ {"match": "string3", "idx": 0, "captures": ["string3"]},
+ {"match": "string4", "idx": 8, "captures": ["string4"]}
+ ]
+ }
+ ]);
+ // Only one match.
+ testRegexFindAgg({input: "$a", regex: "(^.*[0-2]$)"}, [
+ {"matches": []},
+ {"matches": [{"match": "string1string2", "idx": 0, "captures": ["string1string2"]}]}
+
+ ]);
+ // None match.
+ testRegexFindAgg({input: "$a", regex: "(^.*[5-9]$)"}, [{"matches": []}, {"matches": []}]);
+ })();
+
+ (function testInsideCondOperator() {
+ coll.drop();
+ assert.commandWorked(
+ coll.insert({_id: 0, level: "Public Knowledge", info: "Company Name"}));
+ assert.commandWorked(
+ coll.insert({_id: 1, level: "Private Information", info: "Company Secret"}));
+ const expectedResults =
+ [{"_id": 0, "information": "Company Name"}, {"_id": 1, "information": "REDACTED"}];
+ // For $regexFindAll.
+ let result =
+ coll.aggregate([{
+ "$project": {
+ "information": {
+ "$cond": [
+ {
+ "$eq":
+ [{"$regexFindAll": {input: "$level", regex: /public/i}}, []]
+ },
+ "REDACTED",
+ "$info"
+ ]
+ }
+ }
+ }])
+ .toArray();
+ assert.eq(result, expectedResults);
+ // For $regexFind.
+ result =
+ coll.aggregate([{
+ "$project": {
+ "information": {
+ "$cond": [
+ {
+ "$eq":
+ [{"$regexFind": {input: "$level", regex: /public/i}}, null]
+ },
+ "REDACTED",
+ "$info"
+ ]
+ }
+ }
+ }])
+ .toArray();
+ assert.eq(result, expectedResults);
+ })();
+}());
diff --git a/jstests/aggregation/expressions/regexFind.js b/jstests/aggregation/expressions/regexFind.js
deleted file mode 100644
index e5743a453c5..00000000000
--- a/jstests/aggregation/expressions/regexFind.js
+++ /dev/null
@@ -1,339 +0,0 @@
-/*
- * Tests for $regexFind aggregation expression.
- */
-(function() {
- 'use strict';
-
- load("jstests/aggregation/extras/utils.js"); // For assertErrorCode().
-
- const coll = db.regex_find_expr;
- coll.drop();
-
- function testRegexFindAgg(regexFind, expectedOutput) {
- const result =
- coll.aggregate([
- {"$project": {_id: 0, "matches": {"$regexFind": regexFind}}},
- {"$sort": {"matches": 1}} // Ensure that the documents are returned in a
- // deterministic order for sharded clusters.
- ])
- .toArray();
- assert.eq(result, expectedOutput);
- }
- function testRegexFindAggForKey(key, regexFind, expectedMatchObj) {
- const result = coll.aggregate([
- {"$match": {"_id": key}},
- {"$project": {"matches": {"$regexFind": regexFind}}}
- ])
- .toArray();
- const expectedOutput = [{"_id": key, "matches": expectedMatchObj}];
- assert.eq(result, expectedOutput);
- }
- function testRegexFindAggException(regexFind, exceptionCode) {
- assertErrorCode(
- coll, [{"$project": {"matches": {"$regexFind": regexFind}}}], exceptionCode);
- }
-
- (function testWithSingleMatch() {
- // Regex in string notation, find with multiple captures.
- assert.commandWorked(coll.insert({_id: 0, text: "Simple Example"}));
- testRegexFindAggForKey(0,
- {input: "$text", regex: "(m(p))"},
- {"match": "mp", "idx": 2, "captures": ["mp", "p"]});
- // Regex in json syntax, with multiple captures.
- testRegexFindAggForKey(0, {input: "$text", regex: /(S)(i)(m)(p)(l)(e) (Ex)(am)(p)(le)/}, {
- "match": "Simple Example",
- "idx": 0,
- "captures": ["S", "i", "m", "p", "l", "e", "Ex", "am", "p", "le"]
- });
-
- // Regex string groups within group.
- testRegexFindAggForKey(
- 0,
- {input: "$text", regex: "((S)(i)(m)(p)(l)(e))"},
- {"match": "Simple", "idx": 0, "captures": ["Simple", "S", "i", "m", "p", "l", "e"]});
- testRegexFindAggForKey(
- 0,
- {input: "$text", regex: "(S)(i)(m)((p)(l)(e))"},
- {"match": "Simple", "idx": 0, "captures": ["S", "i", "m", "ple", "p", "l", "e"]});
-
- // Regex email pattern.
- assert.commandWorked(
- coll.insert({_id: 1, text: "Some field text with email mongo@mongodb.com"}));
- testRegexFindAggForKey(
- 1,
- {input: "$text", regex: "([a-zA-Z0-9._-]+)@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+"},
- {"match": "mongo@mongodb.com", "idx": 27, "captures": ["mongo"]});
-
- // Regex digits.
- assert.commandWorked(coll.insert({_id: 5, text: "Text with 02 digits"}));
- testRegexFindAggForKey(
- 5, {input: "$text", regex: /[0-9]+/}, {"match": "02", "idx": 10, "captures": []});
- testRegexFindAggForKey(
- 5, {input: "$text", regex: /(\d+)/}, {"match": "02", "idx": 10, "captures": ["02"]});
-
- // Regex a non-capture group.
- assert.commandWorked(coll.insert({_id: 6, text: "1,2,3,4,5,6,7,8,9,10"}));
- testRegexFindAggForKey(6,
- {input: "$text", regex: /^(?:1|a)\,([0-9]+)/},
- {"match": "1,2", "idx": 0, "captures": ["2"]});
-
- // Regex quantifier.
- assert.commandWorked(coll.insert({_id: 7, text: "abc12defgh345jklm"}));
- testRegexFindAggForKey(
- 7, {input: "$text", regex: /[0-9]{3}/}, {"match": "345", "idx": 10, "captures": []});
-
- // Regex case insensitive option.
- assert.commandWorked(coll.insert({_id: 8, text: "This Is Camel Case"}));
- testRegexFindAggForKey(8, {input: "$text", regex: /camel/}, null);
- testRegexFindAggForKey(
- 8, {input: "$text", regex: /camel/i}, {"match": "Camel", "idx": 8, "captures": []});
- testRegexFindAggForKey(8,
- {input: "$text", regex: /camel/, options: "i"},
- {"match": "Camel", "idx": 8, "captures": []});
- testRegexFindAggForKey(8,
- {input: "$text", regex: "camel", options: "i"},
- {"match": "Camel", "idx": 8, "captures": []});
-
- // Regex multi line option.
- assert.commandWorked(coll.insert({_id: 9, text: "Foo line1\nFoo line2\nFoo line3"}));
- // Verify no match with options flag off.
- testRegexFindAggForKey(9, {input: "$text", regex: /^Foo line\d$/}, null);
- // Verify match when flag is on.
- testRegexFindAggForKey(9,
- {input: "$text", regex: /^Foo line\d$/m},
- {"match": "Foo line1", "idx": 0, "captures": []});
-
- // Regex single line option.
- testRegexFindAggForKey(9,
- {input: "$text", regex: "Foo.*line"},
- {"match": "Foo line", "idx": 0, "captures": []});
- testRegexFindAggForKey(
- 9,
- {input: "$text", regex: "Foo.*line", options: "s"},
- {"match": "Foo line1\nFoo line2\nFoo line", "idx": 0, "captures": []});
-
- // Regex extended option.
- testRegexFindAggForKey(9, {input: "$text", regex: "F o o # a comment"}, null);
- testRegexFindAggForKey(9,
- {input: "$text", regex: "F o o # a comment", options: "x"},
- {"match": "Foo", "idx": 0, "captures": []});
- testRegexFindAggForKey(
- 9,
- {input: "$text", regex: "F o o # a comment \n\n# ignored", options: "x"},
- {"match": "Foo", "idx": 0, "captures": []});
- testRegexFindAggForKey(9,
- {input: "$text", regex: "(F o o) # a comment", options: "x"},
- {"match": "Foo", "idx": 0, "captures": ["Foo"]});
-
- // Regex pattern from a document field value.
- assert.commandWorked(coll.insert({_id: 10, text: "Simple Value", pattern: "(m(p))"}));
- testRegexFindAggForKey(10,
- {input: "$text", regex: "$pattern"},
- {"match": "mp", "idx": 2, "captures": ["mp", "p"]});
- assert.commandWorked(coll.insert({_id: 11, text: "OtherText", pattern: /(T(e))xt$/}));
- testRegexFindAggForKey(11,
- {input: "$text", regex: "$pattern"},
- {"match": "Text", "idx": 5, "captures": ["Te", "e"]});
-
- // 'regex' as object with null characters.
- assert.commandWorked(coll.insert({_id: 12, text: "Null\0 charac\0ters"}));
- testRegexFindAggForKey(12, {input: "$text", regex: /((Null)(\0))( )(charac\0t)/}, {
- "match": "Null\0 charac\0t",
- "idx": 0,
- "captures": ["Null\0", "Null", "\0", " ", "charac\0t"]
- });
- testRegexFindAggForKey(
- 12,
- {input: "$text", regex: /(\x00)( )(charac\x00t)/},
- {"match": "\0 charac\x00t", "idx": 4, "captures": ["\x00", " ", "charac\0t"]});
- // 'regex' as string with escaped null characters.
- testRegexFindAggForKey(12,
- {input: "$text", regex: "l\\0 charac\\0ter.*$"},
- {"match": "l\0 charac\0ters", "idx": 3, "captures": []});
- // No match with null characters in input.
- testRegexFindAggForKey(12, {input: "$text", regex: /Null c/}, null);
- // No match with null characters in regex.
- testRegexFindAggForKey(12, {input: "$text", regex: /Nul\0l/}, null);
-
- // No matches.
- testRegexFindAggForKey(0, {input: "$text", regex: /foo/}, null);
- // Regex null.
- testRegexFindAggForKey(0, {input: "$text", regex: null}, null);
- // Regex not present.
- testRegexFindAggForKey(0, {input: "$text"}, null);
- // Input not present.
- testRegexFindAggForKey(0, {regex: /valid/}, null);
- // Input null.
- testRegexFindAggForKey(0, {input: null, regex: /valid/}, null);
- // Empty object.
- testRegexFindAggForKey(0, {}, null);
- })();
-
- (function testWithStartOptions() {
- coll.drop();
- assert.commandWorked(coll.insert({_id: 2, text: "cafétéria"}));
- assert.commandWorked(coll.insert({_id: 3, text: "ab\ncd"}));
-
- // LIMIT_MATCH option to limit the number of comparisons PCRE does internally.
- testRegexFindAggForKey(2, {input: "$text", regex: "(*LIMIT_MATCH=1)fé"}, null);
- testRegexFindAggForKey(2,
- {input: "$text", regex: "(*LIMIT_MATCH=3)(fé)"},
- {"match": "fé", "idx": 2, "captures": ["fé"]});
-
- // (*LF) will change the feed system to UNIX like and (*CR) to windows like. So '\n' would
- // match '.' with CR but not LF.
- testRegexFindAggForKey(3, {input: "$text", regex: "(*LF)ab.cd"}, null);
- testRegexFindAggForKey(3,
- {input: "$text", regex: "(*CR)ab.cd"},
- {"match": "ab\ncd", "idx": 0, "captures": []});
-
- // Multiple start options.
- testRegexFindAggForKey(2,
- {input: "$text", regex: String.raw `(*LIMIT_MATCH=5)(*UCP)^(\w+)`},
- {"match": "cafétéria", "idx": 0, "captures": ["cafétéria"]});
- testRegexFindAggForKey(
- 2, {input: "$text", regex: String.raw `(*LIMIT_MATCH=1)(*UCP)^(\w+)`}, null);
- })();
-
- (function testWithUnicodeData() {
- coll.drop();
- // Unicode index counting.
- assert.commandWorked(coll.insert({_id: 2, text: "cafétéria"}));
- assert.commandWorked(coll.insert({_id: 3, text: "मा०गो डीबि"}));
- testRegexFindAggForKey(
- 2, {input: "$text", regex: "té"}, {"match": "té", "idx": 4, "captures": []});
- testRegexFindAggForKey(
- 3, {input: "$text", regex: /म/}, {"match": "म", "idx": 0, "captures": []});
- // Unicode with capture group.
- testRegexFindAggForKey(3,
- {input: "$text", regex: /(गो )/},
- {"match": "गो ", "idx": 3, "captures": ["गो "]});
- // Test that regexes support Unicode character properties.
- testRegexFindAggForKey(2, {input: "$text", regex: String.raw `\p{Hangul}`}, null);
- testRegexFindAggForKey(2,
- {input: "$text", regex: String.raw `\p{Latin}+$`},
- {"match": "cafétéria", "idx": 0, "captures": []});
- // Test that the (*UTF) and (*UTF8) options are accepted for unicode characters.
- assert.commandWorked(coll.insert({_id: 12, text: "༢༣༤༤༤༥12༥A"}));
- testRegexFindAggForKey(
- 12, {input: "$text", regex: "(*UTF8)༤"}, {"match": "༤", "idx": 2, "captures": []});
- testRegexFindAggForKey(
- 12, {input: "$text", regex: "(*UTF)༤"}, {"match": "༤", "idx": 2, "captures": []});
- // For ASCII characters.
- assert.commandWorked(coll.insert({_id: 4, text: "123444"}));
- testRegexFindAggForKey(4,
- {input: "$text", regex: "(*UTF8)(44)"},
- {"match": "44", "idx": 3, "captures": ["44"]});
- testRegexFindAggForKey(4,
- {input: "$text", regex: "(*UTF)(44)"},
- {"match": "44", "idx": 3, "captures": ["44"]});
-
- // When the (*UCP) option is specified, Unicode "word" characters are included in the '\w'
- // character type.
- testRegexFindAggForKey(12,
- {input: "$text", regex: String.raw `(*UCP)^(\w+)`},
- {"match": "༢༣༤༤༤༥12༥A", "idx": 0, "captures": ["༢༣༤༤༤༥12༥A"]});
- // When the (*UCP) option is specified, [:digit:] becomes \p{N} and matches all Unicode
- // decimal digit characters.
- testRegexFindAggForKey(12,
- {input: "$text", regex: "(*UCP)^[[:digit:]]+"},
- {"match": "༢༣༤༤༤༥12༥", "idx": 0, "captures": []});
- testRegexFindAggForKey(12, {input: "$text", regex: "(*UCP)[[:digit:]]+$"}, null);
- // When the (*UCP) option is specified, [:alpha:] becomes \p{L} and matches all Unicode
- // alphabetic characters.
- assert.commandWorked(coll.insert({_id: 13, text: "박정수AB"}));
- testRegexFindAggForKey(13,
- {input: "$text", regex: String.raw `(*UCP)^[[:alpha:]]+`},
- {"match": "박정수AB", "idx": 0, "captures": []});
-
- // No match when options are not set.
- testRegexFindAggForKey(12, {input: "$text", regex: String.raw `^(\w+)`}, null);
- testRegexFindAggForKey(12, {input: "$text", regex: "^[[:digit:]]"}, null);
- testRegexFindAggForKey(2, {input: "$text", regex: "^[[:alpha:]]+$"}, null);
- })();
-
- (function testErrors() {
- coll.drop();
- assert.commandWorked(coll.insert({text: "string"}));
- // Null object.
- testRegexFindAggException(null, 51103);
- // Incorrect object parameter.
- testRegexFindAggException("incorrect type", 51103);
- // Test malformed regex.
- testRegexFindAggException({input: "$text", regex: "[0-9"}, 51111);
- // Malformed regex because start options not at the beginning.
- testRegexFindAggException({input: "$text", regex: "^(*UCP)[[:alpha:]]+$"}, 51111);
- testRegexFindAggException({input: "$text", regex: "((*UCP)[[:alpha:]]+$)"}, 51111);
- // At least one of the 'input' field is not string.
- assert.commandWorked(coll.insert({a: "string"}));
- assert.commandWorked(coll.insert({a: {b: "object"}}));
- testRegexFindAggException({input: "$a", regex: "valid"}, 51104);
- // 'regex' field is not string or regex.
- testRegexFindAggException({input: "$text", regex: ["incorrect"]}, 51105);
- // 'options' field is not string.
- testRegexFindAggException({input: "$text", regex: "valid", options: 123}, 51106);
- // Incorrect 'options' flag.
- testRegexFindAggException({input: "$text", regex: "valid", options: 'a'}, 51108);
- // 'options' are case-sensitive.
- testRegexFindAggException({input: "$text", regex: "valid", options: "I"}, 51108);
- // Options specified in both 'regex' and 'options'.
- testRegexFindAggException({input: "$text", regex: /(m(p))/i, options: "i"}, 51107);
- testRegexFindAggException({input: "$text", regex: /(m(p))/i, options: "x"}, 51107);
- testRegexFindAggException({input: "$text", regex: /(m(p))/m, options: ""}, 51107);
- // 'regex' as string with null characters.
- testRegexFindAggException({input: "$text", regex: "sasd\0", options: "i"}, 51109);
- testRegexFindAggException({input: "$text", regex: "sa\x00sd", options: "i"}, 51109);
- // 'options' as string with null characters.
- testRegexFindAggException({input: "$text", regex: /(m(p))/, options: "i\0"}, 51110);
- testRegexFindAggException({input: "$text", regex: /(m(p))/, options: "i\x00"}, 51110);
-
- })();
-
- (function testMultipleMatches() {
- coll.drop();
- assert.commandWorked(coll.insert({a: "string1"}));
- assert.commandWorked(coll.insert({a: "string2"}));
- // Both match.
- testRegexFindAgg({input: "$a", regex: "(^str.*)"}, [
- {"matches": {"match": "string1", "idx": 0, "captures": ["string1"]}},
- {"matches": {"match": "string2", "idx": 0, "captures": ["string2"]}}
- ]);
- // Only one match.
- testRegexFindAgg({input: "$a", regex: "(^.*[0-1]$)"}, [
- {"matches": null},
- {"matches": {"match": "string1", "idx": 0, "captures": ["string1"]}}
- ]);
- // None match.
- testRegexFindAgg({input: "$a", regex: "(^.*[3-9]$)"},
- [{"matches": null}, {"matches": null}]);
- })();
-
- (function testInsideCondOperator() {
- coll.drop();
- assert.commandWorked(
- coll.insert({_id: 0, level: "Public Knowledge", info: "Company Name"}));
- assert.commandWorked(
- coll.insert({_id: 1, level: "Private Information", info: "Company Secret"}));
-
- const result =
- coll.aggregate([{
- "$project": {
- "information": {
- "$cond": [
- {
- "$eq":
- [{"$regexFind": {input: "$level", regex: /public/i}}, null]
- },
- "REDACTED",
- "$info"
- ]
- }
- }
- }])
- .toArray();
- assert.eq(result, [
- {"_id": 0, "information": "Company Name"},
- {"_id": 1, "information": "REDACTED"},
- ]);
- })();
-}());