5 files changed, 728 insertions, 457 deletions
diff --git a/jstests/aggregation/expressions/regex.js b/jstests/aggregation/expressions/regex.js
new file mode 100644
index 00000000000..31b04b54976
--- /dev/null
+++ b/jstests/aggregation/expressions/regex.js
@@ -0,0 +1,438 @@
+/*
+ * Tests for $regexFind and $regexFindAll aggregation expression.
+ */
+(function() {
+    'use strict';
+    load("jstests/aggregation/extras/utils.js");  // For assertErrorCode().
+    const coll = db.regex_find_expr;
+    coll.drop();
+
+    function testRegex(expression, inputObj, expectedOutput) {
+        const result =
+            coll.aggregate([
+                    {"$project": {_id: 0, "matches": {[expression]: inputObj}}},
+                    {"$sort": {"matches": 1}}  // Sort to ensure the documents are returned in a
+                                               // deterministic order for sharded clusters.
+                ])
+                .toArray();
+        assert.eq(result, expectedOutput);
+    }
+    function testRegexForKey(expression, key, inputObj, expectedMatchObj) {
+        const result =
+            coll.aggregate(
+                    [{"$match": {"_id": key}}, {"$project": {"matches": {[expression]: inputObj}}}])
+                .toArray();
+        const expectedOutput = [{"_id": key, "matches": expectedMatchObj}];
+        assert.eq(result, expectedOutput);
+    }
+
+    /**
+     * This function validates the output against both $regexFind and $regexFindAll expressions.
+     */
+    function testRegexFindAgg(inputObj, expectedOutputForFindAll) {
+        testRegex("$regexFindAll", inputObj, expectedOutputForFindAll);
+
+        // For each of the output document, get first element from "matches" array. This will
+        // convert 'regexFindAll' output to 'regexFind' output.
+        const expectedOutputForFind = expectedOutputForFindAll.map(
+            (element) => ({matches: element.matches.length == 0 ? null : element.matches[0]}));
+        testRegex("$regexFind", inputObj, expectedOutputForFind);
+    }
+
+    /**
+     * This function validates the output against both $regexFind and $regexFindAll expressions.
+     */
+    function testRegexFindAggForKey(key, inputObj, expectedOutputForFindAll) {
+        testRegexForKey("$regexFindAll", key, inputObj, expectedOutputForFindAll);
+        const expectedOutputForFind =
+            expectedOutputForFindAll.length == 0 ? null : expectedOutputForFindAll[0];
+        testRegexForKey("$regexFind", key, inputObj, expectedOutputForFind);
+    }
+
+    /**
+     * This function validates the output against both $regexFind and $regexFindAll expressions.
+     */
+    function testRegexAggException(inputObj, exceptionCode) {
+        assertErrorCode(
+            coll, [{"$project": {"matches": {"$regexFindAll": inputObj}}}], exceptionCode);
+        assertErrorCode(coll, [{"$project": {"matches": {"$regexFind": inputObj}}}], exceptionCode);
+    }
+
+    (function testWithSingleMatch() {
+        // Regex in string notation, find with multiple captures and matches.
+        assert.commandWorked(coll.insert({_id: 0, text: "Simple Example "}));
+        testRegexFindAggForKey(0, {input: "$text", regex: "(m(p))"}, [
+            {"match": "mp", "idx": 2, "captures": ["mp", "p"]},
+            {"match": "mp", "idx": 10, "captures": ["mp", "p"]}
+        ]);
+        // Regex in json syntax, with multiple captures and matches.
+        testRegexFindAggForKey(0, {input: "$text", regex: /(m(p))/}, [
+            {"match": "mp", "idx": 2, "captures": ["mp", "p"]},
+            {"match": "mp", "idx": 10, "captures": ["mp", "p"]}
+        ]);
+        // Verify no overlapping match sub-strings.
+        assert.commandWorked(coll.insert({_id: 112, text: "aaaaa aaaa"}));
+        testRegexFindAggForKey(112, {input: "$text", regex: /(aa)/}, [
+            {"match": "aa", "idx": 0, "captures": ["aa"]},
+            {"match": "aa", "idx": 2, "captures": ["aa"]},
+            {"match": "aa", "idx": 6, "captures": ["aa"]},
+            {"match": "aa", "idx": 8, "captures": ["aa"]}
+        ]);
+        testRegexFindAggForKey(112, {input: "$text", regex: /(aa)+/}, [
+            {"match": "aaaa", "idx": 0, "captures": ["aa"]},
+            {"match": "aaaa", "idx": 6, "captures": ["aa"]}
+        ]);
+        // Verify greedy match.
+        testRegexFindAggForKey(112, {input: "$text", regex: /(a+)/}, [
+            {"match": "aaaaa", "idx": 0, "captures": ["aaaaa"]},
+            {"match": "aaaa", "idx": 6, "captures": ["aaaa"]},
+        ]);
+        testRegexFindAggForKey(112, {input: "$text", regex: /(a)+/}, [
+            {"match": "aaaaa", "idx": 0, "captures": ["a"]},
+            {"match": "aaaa", "idx": 6, "captures": ["a"]},
+        ]);
+        // Verify lazy match.
+        assert.commandWorked(coll.insert({_id: 113, text: "aaa aa"}));
+        testRegexFindAggForKey(113, {input: "$text", regex: /(a+?)/}, [
+            {"match": "a", "idx": 0, "captures": ["a"]},
+            {"match": "a", "idx": 1, "captures": ["a"]},
+            {"match": "a", "idx": 2, "captures": ["a"]},
+            {"match": "a", "idx": 4, "captures": ["a"]},
+            {"match": "a", "idx": 5, "captures": ["a"]}
+        ]);
+        testRegexFindAggForKey(113, {input: "$text", regex: /(a*?)/}, [
+            {"match": "", "idx": 0, "captures": [""]},
+            {"match": "", "idx": 1, "captures": [""]},
+            {"match": "", "idx": 2, "captures": [""]},
+            {"match": "", "idx": 3, "captures": [""]},
+            {"match": "", "idx": 4, "captures": [""]},
+            {"match": "", "idx": 5, "captures": [""]}
+        ]);
+
+        // Regex string groups within group.
+        testRegexFindAggForKey(
+            0,
+            {input: "$text", regex: "((S)(i)(m)(p)(l)(e))"},
+            [{"match": "Simple", "idx": 0, "captures": ["Simple", "S", "i", "m", "p", "l", "e"]}]);
+        testRegexFindAggForKey(
+            0,
+            {input: "$text", regex: "(S)(i)(m)((p)(l)(e))"},
+            [{"match": "Simple", "idx": 0, "captures": ["S", "i", "m", "ple", "p", "l", "e"]}]);
+
+        // Regex email pattern.
+        assert.commandWorked(
+            coll.insert({_id: 1, text: "Some field text with email mongo@mongodb.com"}));
+        testRegexFindAggForKey(
+            1,
+            {input: "$text", regex: "([a-zA-Z0-9._-]+)@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+"},
+            [{"match": "mongo@mongodb.com", "idx": 27, "captures": ["mongo"]}]);
+
+        // Regex digits.
+        assert.commandWorked(coll.insert({_id: 5, text: "Text with 02 digits"}));
+        testRegexFindAggForKey(
+            5, {input: "$text", regex: /[0-9]+/}, [{"match": "02", "idx": 10, "captures": []}]);
+        testRegexFindAggForKey(
+            5, {input: "$text", regex: /(\d+)/}, [{"match": "02", "idx": 10, "captures": ["02"]}]);
+
+        // Regex a non-capture group.
+        assert.commandWorked(coll.insert({_id: 6, text: "1,2,3,4,5,6,7,8,9,10"}));
+        testRegexFindAggForKey(6,
+                               {input: "$text", regex: /^(?:1|a)\,([0-9]+)/},
+                               [{"match": "1,2", "idx": 0, "captures": ["2"]}]);
+
+        // Regex quantifier.
+        assert.commandWorked(coll.insert({_id: 7, text: "abc12defgh345jklm"}));
+        testRegexFindAggForKey(
+            7, {input: "$text", regex: /[0-9]{3}/}, [{"match": "345", "idx": 10, "captures": []}]);
+
+        // Regex case insensitive option.
+        assert.commandWorked(coll.insert({_id: 8, text: "This Is Camel Case"}));
+        testRegexFindAggForKey(8, {input: "$text", regex: /camel/}, []);
+        testRegexFindAggForKey(
+            8, {input: "$text", regex: /camel/i}, [{"match": "Camel", "idx": 8, "captures": []}]);
+        testRegexFindAggForKey(8,
+                               {input: "$text", regex: /camel/, options: "i"},
+                               [{"match": "Camel", "idx": 8, "captures": []}]);
+        testRegexFindAggForKey(8,
+                               {input: "$text", regex: "camel", options: "i"},
+                               [{"match": "Camel", "idx": 8, "captures": []}]);
+
+        // Regex multi line option.
+        assert.commandWorked(coll.insert({_id: 9, text: "Foo line1\nFoo line2\nFoo line3"}));
+        // Verify no match with options flag off.
+        testRegexFindAggForKey(9, {input: "$text", regex: /^Foo line\d$/}, []);
+        // Verify match when flag is on.
+        testRegexFindAggForKey(9, {input: "$text", regex: /(^Foo line\d$)/m}, [
+            {"match": "Foo line1", "idx": 0, "captures": ["Foo line1"]},
+            {"match": "Foo line2", "idx": 10, "captures": ["Foo line2"]},
+            {"match": "Foo line3", "idx": 20, "captures": ["Foo line3"]}
+        ]);
+
+        // Regex single line option.
+        testRegexFindAggForKey(9, {input: "$text", regex: "Foo.*line"}, [
+            {"match": "Foo line", "idx": 0, "captures": []},
+            {"match": "Foo line", "idx": 10, "captures": []},
+            {"match": "Foo line", "idx": 20, "captures": []}
+        ]);
+        testRegexFindAggForKey(
+            9,
+            {input: "$text", regex: "Foo.*line", options: "s"},
+            [{"match": "Foo line1\nFoo line2\nFoo line", "idx": 0, "captures": []}]);
+
+        // Regex extended option.
+        testRegexFindAggForKey(9, {input: "$text", regex: "F o o # a comment"}, []);
+        testRegexFindAggForKey(9, {input: "$text", regex: "F o o # a comment", options: "x"}, [
+            {"match": "Foo", "idx": 0, "captures": []},
+            {"match": "Foo", "idx": 10, "captures": []},
+            {"match": "Foo", "idx": 20, "captures": []}
+        ]);
+        testRegexFindAggForKey(
+            9, {input: "$text", regex: "F o o # a comment \n\n# ignored", options: "x"}, [
+                {"match": "Foo", "idx": 0, "captures": []},
+                {"match": "Foo", "idx": 10, "captures": []},
+                {"match": "Foo", "idx": 20, "captures": []}
+            ]);
+        testRegexFindAggForKey(9, {input: "$text", regex: "(F o o) # a comment", options: "x"}, [
+            {"match": "Foo", "idx": 0, "captures": ["Foo"]},
+            {"match": "Foo", "idx": 10, "captures": ["Foo"]},
+            {"match": "Foo", "idx": 20, "captures": ["Foo"]}
+        ]);
+
+        // Regex pattern from a document field value.
+        assert.commandWorked(
+            coll.insert({_id: 10, text: "Simple Value Example", pattern: "(m(p))"}));
+        testRegexFindAggForKey(10, {input: "$text", regex: "$pattern"}, [
+            {"match": "mp", "idx": 2, "captures": ["mp", "p"]},
+            {"match": "mp", "idx": 16, "captures": ["mp", "p"]}
+        ]);
+        assert.commandWorked(coll.insert({_id: 11, text: "OtherText", pattern: /(T(e))xt$/}));
+        testRegexFindAggForKey(11,
+                               {input: "$text", regex: "$pattern"},
+                               [{"match": "Text", "idx": 5, "captures": ["Te", "e"]}]);
+
+        // Empty input matches empty regex.
+        testRegexFindAggForKey(
+            0, {input: "", regex: ""}, [{"match": "", "idx": 0, "captures": []}]);
+        // Empty captures groups.
+        testRegexFindAggForKey(0, {input: "bbbb", regex: "()"}, [
+            {"match": "", "idx": 0, "captures": [""]},
+            {"match": "", "idx": 1, "captures": [""]},
+            {"match": "", "idx": 2, "captures": [""]},
+            {"match": "", "idx": 3, "captures": [""]}
+        ]);
+        // No matches.
+        testRegexFindAggForKey(0, {input: "$text", regex: /foo/}, []);
+        // Regex null.
+        testRegexFindAggForKey(0, {input: "$text", regex: null}, []);
+        // Regex not present.
+        testRegexFindAggForKey(0, {input: "$text"}, []);
+        // Input not present.
+        testRegexFindAggForKey(0, {regex: /valid/}, []);
+        // Input null.
+        testRegexFindAggForKey(0, {input: null, regex: /valid/}, []);
+        // Empty object.
+        testRegexFindAggForKey(0, {}, []);
+    })();
+
+    (function testWithStartOptions() {
+        coll.drop();
+        assert.commandWorked(coll.insert({_id: 2, text: "cafétéria"}));
+        assert.commandWorked(coll.insert({_id: 3, text: "ab\ncd"}));
+
+        // LIMIT_MATCH option to limit the number of comparisons PCRE does internally.
+        testRegexFindAggForKey(2, {input: "$text", regex: "(*LIMIT_MATCH=1)fé"}, []);
+        testRegexFindAggForKey(2,
+                               {input: "$text", regex: "(*LIMIT_MATCH=3)(fé)"},
+                               [{"match": "fé", "idx": 2, "captures": ["fé"]}]);
+
+        // (*LF) would change the feed system to UNIX like and (*CR) to windows like. So '\n' would
+        // match '.' with CR but not LF.
+        testRegexFindAggForKey(3, {input: "$text", regex: "(*LF)ab.cd"}, []);
+        testRegexFindAggForKey(3,
+                               {input: "$text", regex: "(*CR)ab.cd"},
+                               [{"match": "ab\ncd", "idx": 0, "captures": []}]);
+
+        // Multiple start options.
+        testRegexFindAggForKey(2,
+                               {input: "$text", regex: String.raw `(*LIMIT_MATCH=5)(*UCP)^(\w+)`},
+                               [{"match": "cafétéria", "idx": 0, "captures": ["cafétéria"]}]);
+        testRegexFindAggForKey(
+            2, {input: "$text", regex: String.raw `(*LIMIT_MATCH=1)(*UCP)^(\w+)`}, []);
+    })();
+
+    (function testWithUnicodeData() {
+        coll.drop();
+        // Unicode index counting.
+        assert.commandWorked(coll.insert({_id: 2, text: "cafétéria"}));
+        assert.commandWorked(coll.insert({_id: 3, text: "मा०गो डीबि"}));
+        testRegexFindAggForKey(
+            2, {input: "$text", regex: "té"}, [{"match": "té", "idx": 4, "captures": []}]);
+        testRegexFindAggForKey(
+            3, {input: "$text", regex: /म/}, [{"match": "म", "idx": 0, "captures": []}]);
+        // Unicode with capture group.
+        testRegexFindAggForKey(3,
+                               {input: "$text", regex: /(गो )/},
+                               [{"match": "गो ", "idx": 3, "captures": ["गो "]}]);
+        // Test that regexes support Unicode character properties.
+        testRegexFindAggForKey(2, {input: "$text", regex: String.raw `\p{Hangul}`}, []);
+        testRegexFindAggForKey(2,
+                               {input: "$text", regex: String.raw `\p{Latin}+$`},
+                               [{"match": "cafétéria", "idx": 0, "captures": []}]);
+        // Test that the (*UTF) and (*UTF8) options are accepted for unicode characters.
+        assert.commandWorked(coll.insert({_id: 12, text: "༢༣༤༤༤༥12༥A"}));
+        testRegexFindAggForKey(12, {input: "$text", regex: "(*UTF8)༤"}, [
+            {"match": "༤", "idx": 2, "captures": []},
+            {"match": "༤", "idx": 3, "captures": []},
+            {"match": "༤", "idx": 4, "captures": []}
+        ]);
+        testRegexFindAggForKey(12, {input: "$text", regex: "(*UTF)༤"}, [
+            {"match": "༤", "idx": 2, "captures": []},
+            {"match": "༤", "idx": 3, "captures": []},
+            {"match": "༤", "idx": 4, "captures": []}
+        ]);
+        // For ASCII characters.
+        assert.commandWorked(coll.insert({_id: 4, text: "123444"}));
+        testRegexFindAggForKey(4,
+                               {input: "$text", regex: "(*UTF8)(44)"},
+                               [{"match": "44", "idx": 3, "captures": ["44"]}]);
+        testRegexFindAggForKey(4,
+                               {input: "$text", regex: "(*UTF)(44)"},
+                               [{"match": "44", "idx": 3, "captures": ["44"]}]);
+
+        // When the (*UCP) option is specified, Unicode "word" characters are included in the '\w'
+        // character type.
+        testRegexFindAggForKey(12,
+                               {input: "$text", regex: String.raw `(*UCP)^(\w+)`},
+                               [{"match": "༢༣༤༤༤༥12༥A", "idx": 0, "captures": ["༢༣༤༤༤༥12༥A"]}]);
+        // When the (*UCP) option is specified, [:digit:] becomes \p{N} and matches all Unicode
+        // decimal digit characters.
+        testRegexFindAggForKey(12,
+                               {input: "$text", regex: "(*UCP)^[[:digit:]]+"},
+                               [{"match": "༢༣༤༤༤༥12༥", "idx": 0, "captures": []}]);
+        testRegexFindAggForKey(12, {input: "$text", regex: "(*UCP)[[:digit:]]+$"}, []);
+        // When the (*UCP) option is specified, [:alpha:] becomes \p{L} and matches all Unicode
+        // alphabetic characters.
+        assert.commandWorked(coll.insert({_id: 13, text: "박정수AB"}));
+        testRegexFindAggForKey(13,
+                               {input: "$text", regex: String.raw `(*UCP)^[[:alpha:]]+`},
+                               [{"match": "박정수AB", "idx": 0, "captures": []}]);
+
+        // No match when options are not set.
+        testRegexFindAggForKey(12, {input: "$text", regex: String.raw `^(\w+)`}, []);
+        testRegexFindAggForKey(12, {input: "$text", regex: "^[[:digit:]]"}, []);
+        testRegexFindAggForKey(2, {input: "$text", regex: "^[[:alpha:]]+$"}, []);
+    })();
+
+    (function testErrors() {
+        coll.drop();
+        assert.commandWorked(coll.insert({text: "string"}));
+        // Null object.
+        testRegexAggException(null, 51103);
+        // Incorrect object parameter.
+        testRegexAggException("incorrect type", 51103);
+        // Test malformed regex.
+        testRegexAggException({input: "$text", regex: "[0-9"}, 51111);
+        testRegexAggException({regex: "[a-c"}, 51111);
+        // Malformed regex because start options not at the beginning.
+        testRegexAggException({input: "$text", regex: "^(*UCP)[[:alpha:]]+$"}, 51111);
+        testRegexAggException({input: "$text", regex: "((*UCP)[[:alpha:]]+$)"}, 51111);
+        // At least one of the 'input' field is not string.
+        assert.commandWorked(coll.insert({a: "string"}));
+        assert.commandWorked(coll.insert({a: {b: "object"}}));
+        testRegexAggException({input: "$a", regex: "valid"}, 51104);
+        testRegexAggException({input: "$a"}, 51104);
+        // 'regex' field is not string or regex.
+        testRegexAggException({input: "$text", regex: ["incorrect"]}, 51105);
+        // 'options' field is not string.
+        testRegexAggException({input: "$text", regex: "valid", options: 123}, 51106);
+        // Incorrect 'options' flag.
+        testRegexAggException({input: "$text", regex: "valid", options: 'a'}, 51108);
+        // 'options' are case-sensitive.
+        testRegexAggException({input: "$text", regex: "valid", options: "I"}, 51108);
+        // Options specified in both 'regex' and 'options'.
+        testRegexAggException({input: "$text", regex: /(m(p))/i, options: "i"}, 51107);
+        testRegexAggException({input: "$text", regex: /(m(p))/i, options: "x"}, 51107);
+        testRegexAggException({input: "$text", regex: /(m(p))/m, options: ""}, 51107);
+        // 'regex' as string with null characters.
+        testRegexAggException({input: "$text", regex: "sasd\0", options: "i"}, 51109);
+        testRegexAggException({regex: "sa\x00sd", options: "i"}, 51109);
+        // 'options' as string with null characters.
+        testRegexAggException({input: "$text", regex: /(m(p))/, options: "i\0"}, 51110);
+        testRegexAggException({input: "$text", options: "i\x00"}, 51110);
+    })();
+
+    (function testMultipleMatches() {
+        coll.drop();
+        assert.commandWorked(coll.insert({a: "string1string2"}));
+        assert.commandWorked(coll.insert({a: "string3 string4"}));
+        // Both match.
+        testRegexFindAgg({input: "$a", regex: "(str.*?[0-9])"}, [
+            {
+              "matches": [
+                  {"match": "string1", "idx": 0, "captures": ["string1"]},
+                  {"match": "string2", "idx": 7, "captures": ["string2"]}
+              ]
+            },
+            {
+              "matches": [
+                  {"match": "string3", "idx": 0, "captures": ["string3"]},
+                  {"match": "string4", "idx": 8, "captures": ["string4"]}
+              ]
+            }
+        ]);
+        // Only one match.
+        testRegexFindAgg({input: "$a", regex: "(^.*[0-2]$)"}, [
+            {"matches": []},
+            {"matches": [{"match": "string1string2", "idx": 0, "captures": ["string1string2"]}]}
+
+        ]);
+        // None match.
+        testRegexFindAgg({input: "$a", regex: "(^.*[5-9]$)"}, [{"matches": []}, {"matches": []}]);
+    })();
+
+    (function testInsideCondOperator() {
+        coll.drop();
+        assert.commandWorked(
+            coll.insert({_id: 0, level: "Public Knowledge", info: "Company Name"}));
+        assert.commandWorked(
+            coll.insert({_id: 1, level: "Private Information", info: "Company Secret"}));
+        const expectedResults =
+            [{"_id": 0, "information": "Company Name"}, {"_id": 1, "information": "REDACTED"}];
+        // For $regexFindAll.
+        let result =
+            coll.aggregate([{
+                    "$project": {
+                        "information": {
+                            "$cond": [
+                                {
+                                  "$eq":
+                                      [{"$regexFindAll": {input: "$level", regex: /public/i}}, []]
+                                },
+                                "REDACTED",
+                                "$info"
+                            ]
+                        }
+                    }
+                }])
+                .toArray();
+        assert.eq(result, expectedResults);
+        // For $regexFind.
+        result =
+            coll.aggregate([{
+                    "$project": {
+                        "information": {
+                            "$cond": [
+                                {
+                                  "$eq":
+                                      [{"$regexFind": {input: "$level", regex: /public/i}}, null]
+                                },
+                                "REDACTED",
+                                "$info"
+                            ]
+                        }
+                    }
+                }])
+                .toArray();
+        assert.eq(result, expectedResults);
+    })();
+}());
diff --git a/jstests/aggregation/expressions/regexFind.js b/jstests/aggregation/expressions/regexFind.js
deleted file mode 100644
index e5743a453c5..00000000000
--- a/jstests/aggregation/expressions/regexFind.js
+++ /dev/null
@@ -1,339 +0,0 @@
-/*
- * Tests for $regexFind aggregation expression.
- */
-(function() {
-    'use strict';
-
-    load("jstests/aggregation/extras/utils.js");  // For assertErrorCode().
-
-    const coll = db.regex_find_expr;
-    coll.drop();
-
-    function testRegexFindAgg(regexFind, expectedOutput) {
-        const result =
-            coll.aggregate([
-                    {"$project": {_id: 0, "matches": {"$regexFind": regexFind}}},
-                    {"$sort": {"matches": 1}}  // Ensure that the documents are returned in a
-                                               // deterministic order for sharded clusters.
-                ])
-                .toArray();
-        assert.eq(result, expectedOutput);
-    }
-    function testRegexFindAggForKey(key, regexFind, expectedMatchObj) {
-        const result = coll.aggregate([
-                               {"$match": {"_id": key}},
-                               {"$project": {"matches": {"$regexFind": regexFind}}}
-                           ])
-                           .toArray();
-        const expectedOutput = [{"_id": key, "matches": expectedMatchObj}];
-        assert.eq(result, expectedOutput);
-    }
-    function testRegexFindAggException(regexFind, exceptionCode) {
-        assertErrorCode(
-            coll, [{"$project": {"matches": {"$regexFind": regexFind}}}], exceptionCode);
-    }
-
-    (function testWithSingleMatch() {
-        // Regex in string notation, find with multiple captures.
-        assert.commandWorked(coll.insert({_id: 0, text: "Simple Example"}));
-        testRegexFindAggForKey(0,
-                               {input: "$text", regex: "(m(p))"},
-                               {"match": "mp", "idx": 2, "captures": ["mp", "p"]});
-        // Regex in json syntax, with multiple captures.
-        testRegexFindAggForKey(0, {input: "$text", regex: /(S)(i)(m)(p)(l)(e) (Ex)(am)(p)(le)/}, {
-            "match": "Simple Example",
-            "idx": 0,
-            "captures": ["S", "i", "m", "p", "l", "e", "Ex", "am", "p", "le"]
-        });
-
-        // Regex string groups within group.
-        testRegexFindAggForKey(
-            0,
-            {input: "$text", regex: "((S)(i)(m)(p)(l)(e))"},
-            {"match": "Simple", "idx": 0, "captures": ["Simple", "S", "i", "m", "p", "l", "e"]});
-        testRegexFindAggForKey(
-            0,
-            {input: "$text", regex: "(S)(i)(m)((p)(l)(e))"},
-            {"match": "Simple", "idx": 0, "captures": ["S", "i", "m", "ple", "p", "l", "e"]});
-
-        // Regex email pattern.
-        assert.commandWorked(
-            coll.insert({_id: 1, text: "Some field text with email mongo@mongodb.com"}));
-        testRegexFindAggForKey(
-            1,
-            {input: "$text", regex: "([a-zA-Z0-9._-]+)@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+"},
-            {"match": "mongo@mongodb.com", "idx": 27, "captures": ["mongo"]});
-
-        // Regex digits.
-        assert.commandWorked(coll.insert({_id: 5, text: "Text with 02 digits"}));
-        testRegexFindAggForKey(
-            5, {input: "$text", regex: /[0-9]+/}, {"match": "02", "idx": 10, "captures": []});
-        testRegexFindAggForKey(
-            5, {input: "$text", regex: /(\d+)/}, {"match": "02", "idx": 10, "captures": ["02"]});
-
-        // Regex a non-capture group.
-        assert.commandWorked(coll.insert({_id: 6, text: "1,2,3,4,5,6,7,8,9,10"}));
-        testRegexFindAggForKey(6,
-                               {input: "$text", regex: /^(?:1|a)\,([0-9]+)/},
-                               {"match": "1,2", "idx": 0, "captures": ["2"]});
-
-        // Regex quantifier.
-        assert.commandWorked(coll.insert({_id: 7, text: "abc12defgh345jklm"}));
-        testRegexFindAggForKey(
-            7, {input: "$text", regex: /[0-9]{3}/}, {"match": "345", "idx": 10, "captures": []});
-
-        // Regex case insensitive option.
-        assert.commandWorked(coll.insert({_id: 8, text: "This Is Camel Case"}));
-        testRegexFindAggForKey(8, {input: "$text", regex: /camel/}, null);
-        testRegexFindAggForKey(
-            8, {input: "$text", regex: /camel/i}, {"match": "Camel", "idx": 8, "captures": []});
-        testRegexFindAggForKey(8,
-                               {input: "$text", regex: /camel/, options: "i"},
-                               {"match": "Camel", "idx": 8, "captures": []});
-        testRegexFindAggForKey(8,
-                               {input: "$text", regex: "camel", options: "i"},
-                               {"match": "Camel", "idx": 8, "captures": []});
-
-        // Regex multi line option.
-        assert.commandWorked(coll.insert({_id: 9, text: "Foo line1\nFoo line2\nFoo line3"}));
-        // Verify no match with options flag off.
-        testRegexFindAggForKey(9, {input: "$text", regex: /^Foo line\d$/}, null);
-        // Verify match when flag is on.
-        testRegexFindAggForKey(9,
-                               {input: "$text", regex: /^Foo line\d$/m},
-                               {"match": "Foo line1", "idx": 0, "captures": []});
-
-        // Regex single line option.
-        testRegexFindAggForKey(9,
-                               {input: "$text", regex: "Foo.*line"},
-                               {"match": "Foo line", "idx": 0, "captures": []});
-        testRegexFindAggForKey(
-            9,
-            {input: "$text", regex: "Foo.*line", options: "s"},
-            {"match": "Foo line1\nFoo line2\nFoo line", "idx": 0, "captures": []});
-
-        // Regex extended option.
-        testRegexFindAggForKey(9, {input: "$text", regex: "F o o # a comment"}, null);
-        testRegexFindAggForKey(9,
-                               {input: "$text", regex: "F o o # a comment", options: "x"},
-                               {"match": "Foo", "idx": 0, "captures": []});
-        testRegexFindAggForKey(
-            9,
-            {input: "$text", regex: "F o o # a comment \n\n# ignored", options: "x"},
-            {"match": "Foo", "idx": 0, "captures": []});
-        testRegexFindAggForKey(9,
-                               {input: "$text", regex: "(F o o) # a comment", options: "x"},
-                               {"match": "Foo", "idx": 0, "captures": ["Foo"]});
-
-        // Regex pattern from a document field value.
-        assert.commandWorked(coll.insert({_id: 10, text: "Simple Value", pattern: "(m(p))"}));
-        testRegexFindAggForKey(10,
-                               {input: "$text", regex: "$pattern"},
-                               {"match": "mp", "idx": 2, "captures": ["mp", "p"]});
-        assert.commandWorked(coll.insert({_id: 11, text: "OtherText", pattern: /(T(e))xt$/}));
-        testRegexFindAggForKey(11,
-                               {input: "$text", regex: "$pattern"},
-                               {"match": "Text", "idx": 5, "captures": ["Te", "e"]});
-
-        // 'regex' as object with null characters.
-        assert.commandWorked(coll.insert({_id: 12, text: "Null\0 charac\0ters"}));
-        testRegexFindAggForKey(12, {input: "$text", regex: /((Null)(\0))( )(charac\0t)/}, {
-            "match": "Null\0 charac\0t",
-            "idx": 0,
-            "captures": ["Null\0", "Null", "\0", " ", "charac\0t"]
-        });
-        testRegexFindAggForKey(
-            12,
-            {input: "$text", regex: /(\x00)( )(charac\x00t)/},
-            {"match": "\0 charac\x00t", "idx": 4, "captures": ["\x00", " ", "charac\0t"]});
-        // 'regex' as string with escaped null characters.
-        testRegexFindAggForKey(12,
-                               {input: "$text", regex: "l\\0 charac\\0ter.*$"},
-                               {"match": "l\0 charac\0ters", "idx": 3, "captures": []});
-        // No match with null characters in input.
-        testRegexFindAggForKey(12, {input: "$text", regex: /Null c/}, null);
-        // No match with null characters in regex.
-        testRegexFindAggForKey(12, {input: "$text", regex: /Nul\0l/}, null);
-
-        // No matches.
-        testRegexFindAggForKey(0, {input: "$text", regex: /foo/}, null);
-        // Regex null.
-        testRegexFindAggForKey(0, {input: "$text", regex: null}, null);
-        // Regex not present.
-        testRegexFindAggForKey(0, {input: "$text"}, null);
-        // Input not present.
-        testRegexFindAggForKey(0, {regex: /valid/}, null);
-        // Input null.
-        testRegexFindAggForKey(0, {input: null, regex: /valid/}, null);
-        // Empty object.
-        testRegexFindAggForKey(0, {}, null);
-    })();
-
-    (function testWithStartOptions() {
-        coll.drop();
-        assert.commandWorked(coll.insert({_id: 2, text: "cafétéria"}));
-        assert.commandWorked(coll.insert({_id: 3, text: "ab\ncd"}));
-
-        // LIMIT_MATCH option to limit the number of comparisons PCRE does internally.
-        testRegexFindAggForKey(2, {input: "$text", regex: "(*LIMIT_MATCH=1)fé"}, null);
-        testRegexFindAggForKey(2,
-                               {input: "$text", regex: "(*LIMIT_MATCH=3)(fé)"},
-                               {"match": "fé", "idx": 2, "captures": ["fé"]});
-
-        // (*LF) will change the feed system to UNIX like and (*CR) to windows like. So '\n' would
-        // match '.' with CR but not LF.
-        testRegexFindAggForKey(3, {input: "$text", regex: "(*LF)ab.cd"}, null);
-        testRegexFindAggForKey(3,
-                               {input: "$text", regex: "(*CR)ab.cd"},
-                               {"match": "ab\ncd", "idx": 0, "captures": []});
-
-        // Multiple start options.
-        testRegexFindAggForKey(2,
-                               {input: "$text", regex: String.raw `(*LIMIT_MATCH=5)(*UCP)^(\w+)`},
-                               {"match": "cafétéria", "idx": 0, "captures": ["cafétéria"]});
-        testRegexFindAggForKey(
-            2, {input: "$text", regex: String.raw `(*LIMIT_MATCH=1)(*UCP)^(\w+)`}, null);
-    })();
-
-    (function testWithUnicodeData() {
-        coll.drop();
-        // Unicode index counting.
-        assert.commandWorked(coll.insert({_id: 2, text: "cafétéria"}));
-        assert.commandWorked(coll.insert({_id: 3, text: "मा०गो डीबि"}));
-        testRegexFindAggForKey(
-            2, {input: "$text", regex: "té"}, {"match": "té", "idx": 4, "captures": []});
-        testRegexFindAggForKey(
-            3, {input: "$text", regex: /म/}, {"match": "म", "idx": 0, "captures": []});
-        // Unicode with capture group.
-        testRegexFindAggForKey(3,
-                               {input: "$text", regex: /(गो )/},
-                               {"match": "गो ", "idx": 3, "captures": ["गो "]});
-        // Test that regexes support Unicode character properties.
-        testRegexFindAggForKey(2, {input: "$text", regex: String.raw `\p{Hangul}`}, null);
-        testRegexFindAggForKey(2,
-                               {input: "$text", regex: String.raw `\p{Latin}+$`},
-                               {"match": "cafétéria", "idx": 0, "captures": []});
-        // Test that the (*UTF) and (*UTF8) options are accepted for unicode characters.
-        assert.commandWorked(coll.insert({_id: 12, text: "༢༣༤༤༤༥12༥A"}));
-        testRegexFindAggForKey(
-            12, {input: "$text", regex: "(*UTF8)༤"}, {"match": "༤", "idx": 2, "captures": []});
-        testRegexFindAggForKey(
-            12, {input: "$text", regex: "(*UTF)༤"}, {"match": "༤", "idx": 2, "captures": []});
-        // For ASCII characters.
-        assert.commandWorked(coll.insert({_id: 4, text: "123444"}));
-        testRegexFindAggForKey(4,
-                               {input: "$text", regex: "(*UTF8)(44)"},
-                               {"match": "44", "idx": 3, "captures": ["44"]});
-        testRegexFindAggForKey(4,
-                               {input: "$text", regex: "(*UTF)(44)"},
-                               {"match": "44", "idx": 3, "captures": ["44"]});
-
-        // When the (*UCP) option is specified, Unicode "word" characters are included in the '\w'
-        // character type.
-        testRegexFindAggForKey(12,
-                               {input: "$text", regex: String.raw `(*UCP)^(\w+)`},
-                               {"match": "༢༣༤༤༤༥12༥A", "idx": 0, "captures": ["༢༣༤༤༤༥12༥A"]});
-        // When the (*UCP) option is specified, [:digit:] becomes \p{N} and matches all Unicode
-        // decimal digit characters.
-        testRegexFindAggForKey(12,
-                               {input: "$text", regex: "(*UCP)^[[:digit:]]+"},
-                               {"match": "༢༣༤༤༤༥12༥", "idx": 0, "captures": []});
-        testRegexFindAggForKey(12, {input: "$text", regex: "(*UCP)[[:digit:]]+$"}, null);
-        // When the (*UCP) option is specified, [:alpha:] becomes \p{L} and matches all Unicode
-        // alphabetic characters.
-        assert.commandWorked(coll.insert({_id: 13, text: "박정수AB"}));
-        testRegexFindAggForKey(13,
-                               {input: "$text", regex: String.raw `(*UCP)^[[:alpha:]]+`},
-                               {"match": "박정수AB", "idx": 0, "captures": []});
-
-        // No match when options are not set.
-        testRegexFindAggForKey(12, {input: "$text", regex: String.raw `^(\w+)`}, null);
-        testRegexFindAggForKey(12, {input: "$text", regex: "^[[:digit:]]"}, null);
-        testRegexFindAggForKey(2, {input: "$text", regex: "^[[:alpha:]]+$"}, null);
-    })();
-
-    (function testErrors() {
-        coll.drop();
-        assert.commandWorked(coll.insert({text: "string"}));
-        // Null object.
-        testRegexFindAggException(null, 51103);
-        // Incorrect object parameter.
-        testRegexFindAggException("incorrect type", 51103);
-        // Test malformed regex.
-        testRegexFindAggException({input: "$text", regex: "[0-9"}, 51111);
-        // Malformed regex because start options not at the beginning.
-        testRegexFindAggException({input: "$text", regex: "^(*UCP)[[:alpha:]]+$"}, 51111);
-        testRegexFindAggException({input: "$text", regex: "((*UCP)[[:alpha:]]+$)"}, 51111);
-        // At least one of the 'input' field is not string.
-        assert.commandWorked(coll.insert({a: "string"}));
-        assert.commandWorked(coll.insert({a: {b: "object"}}));
-        testRegexFindAggException({input: "$a", regex: "valid"}, 51104);
-        // 'regex' field is not string or regex.
-        testRegexFindAggException({input: "$text", regex: ["incorrect"]}, 51105);
-        // 'options' field is not string.
-        testRegexFindAggException({input: "$text", regex: "valid", options: 123}, 51106);
-        // Incorrect 'options' flag.
-        testRegexFindAggException({input: "$text", regex: "valid", options: 'a'}, 51108);
-        // 'options' are case-sensitive.
-        testRegexFindAggException({input: "$text", regex: "valid", options: "I"}, 51108);
-        // Options specified in both 'regex' and 'options'.
-        testRegexFindAggException({input: "$text", regex: /(m(p))/i, options: "i"}, 51107);
-        testRegexFindAggException({input: "$text", regex: /(m(p))/i, options: "x"}, 51107);
-        testRegexFindAggException({input: "$text", regex: /(m(p))/m, options: ""}, 51107);
-        // 'regex' as string with null characters.
-        testRegexFindAggException({input: "$text", regex: "sasd\0", options: "i"}, 51109);
-        testRegexFindAggException({input: "$text", regex: "sa\x00sd", options: "i"}, 51109);
-        // 'options' as string with null characters.
-        testRegexFindAggException({input: "$text", regex: /(m(p))/, options: "i\0"}, 51110);
-        testRegexFindAggException({input: "$text", regex: /(m(p))/, options: "i\x00"}, 51110);
-
-    })();
-
-    (function testMultipleMatches() {
-        coll.drop();
-        assert.commandWorked(coll.insert({a: "string1"}));
-        assert.commandWorked(coll.insert({a: "string2"}));
-        // Both match.
-        testRegexFindAgg({input: "$a", regex: "(^str.*)"}, [
-            {"matches": {"match": "string1", "idx": 0, "captures": ["string1"]}},
-            {"matches": {"match": "string2", "idx": 0, "captures": ["string2"]}}
-        ]);
-        // Only one match.
-        testRegexFindAgg({input: "$a", regex: "(^.*[0-1]$)"}, [
-            {"matches": null},
-            {"matches": {"match": "string1", "idx": 0, "captures": ["string1"]}}
-        ]);
-        // None match.
-        testRegexFindAgg({input: "$a", regex: "(^.*[3-9]$)"},
-                         [{"matches": null}, {"matches": null}]);
-    })();
-
-    (function testInsideCondOperator() {
-        coll.drop();
-        assert.commandWorked(
-            coll.insert({_id: 0, level: "Public Knowledge", info: "Company Name"}));
-        assert.commandWorked(
-            coll.insert({_id: 1, level: "Private Information", info: "Company Secret"}));
-
-        const result =
-            coll.aggregate([{
-                    "$project": {
-                        "information": {
-                            "$cond": [
-                                {
-                                  "$eq":
-                                      [{"$regexFind": {input: "$level", regex: /public/i}}, null]
-                                },
-                                "REDACTED",
-                                "$info"
-                            ]
-                        }
-                    }
-                }])
-                .toArray();
-        assert.eq(result, [
-            {"_id": 0, "information": "Company Name"},
-            {"_id": 1, "information": "REDACTED"},
-        ]);
-    })();
-}());
diff --git a/src/mongo/db/pipeline/expression.cpp b/src/mongo/db/pipeline/expression.cpp
index d0d54cd7e14..f2bd565989e 100644
--- a/src/mongo/db/pipeline/expression.cpp
+++ b/src/mongo/db/pipeline/expression.cpp
@@ -5661,138 +5661,248 @@ Value ExpressionConvert::performConversion(BSONType targetType, Value inputValue
 
 namespace {
 
-Value generateRegexCapturesAndMatches(StringData pattern,
-                                      const int numCaptures,
-                                      const pcrecpp::RE_Options& options,
-                                      StringData input,
-                                      int startBytePos,
-                                      int startCodePointPos) {
-
-    const auto pcreOptions = options.all_options();
-    // The first two-thirds of the vector is used to pass back captured substrings' start and limit
-    // indexes. The remaining third of the vector is used as workspace by pcre_exec() while matching
-    // capturing subpatterns, and is not available for passing back information.
-    const size_t sizeOfOVector = (1 + numCaptures) * 3;
-    const char* compile_error;
-    int eoffset;
-
-    // The C++ interface pcreccp.h doesn't have a way to capture the matched string (or the index of
-    // the match). So we are using the C interface. First we compile all the regex options to
-    // generate pcre object, which will later be used to match against the input string.
-    pcre* pcre = pcre_compile(pattern.rawData(), pcreOptions, &compile_error, &eoffset, nullptr);
-    if (pcre == nullptr) {
-        uasserted(51111, str::stream() << "Invalid Regex: " << compile_error);
-    }
-
-    // TODO: Evaluate the upper bound for this array and fail the request if numCaptures are higher
-    // than the limit (SERVER-37848).
-    std::vector<int> outVector(sizeOfOVector);
-    const int out = pcre_exec(pcre,
-                              0,
-                              input.rawData(),
-                              input.size(),
-                              startBytePos,
-                              0,  // No need to overwrite the options set during pcre_compile.
-                              &outVector.front(),
-                              sizeOfOVector);
-    (*pcre_free)(pcre);
-    // The 'out' parameter will be zero if outVector's size is not big enough to hold all the
-    // captures, which should never be the case.
-    invariant(out != 0);
-
-    // No match.
-    if (out < 0) {
-        return Value(BSONNULL);
+class RegexMatchHandler {
+public:
+    RegexMatchHandler(const Value& inputExpr) : _pcre(nullptr), _nullish(false) {
+        _validateInputAndExtractElements(inputExpr);
+        _compile(regex_util::flags2PcreOptions(_options, false).all_options());
     }
 
-    // The first and second entires of the outVector have the start and limit indices of the matched
-    // string. as byte offsets.
-    const int matchStartByteIndex = outVector[0];
-    // We iterate through the input string's contents preceding the match index, in order to convert
-    // the byte offset to a code point offset.
-    for (int byteIx = startBytePos; byteIx < matchStartByteIndex; ++startCodePointPos) {
-        byteIx += getCodePointLength(input[byteIx]);
+    ~RegexMatchHandler() {
+        if (_pcre != nullptr) {
+            pcre_free(_pcre);
+        }
     }
-    StringData matchedStr = input.substr(outVector[0], outVector[1] - outVector[0]);
 
-    std::vector<Value> captures;
-    // The next 2 * numCaptures entries hold the start index and limit pairs, for each of the
-    // capture groups. We skip the first two elements and start iteration from 3rd element so that
-    // we only construct the strings for capture groups.
-    for (int i = 0; i < numCaptures; i++) {
-        const int start = outVector[2 * (i + 1)];
-        const int limit = outVector[2 * (i + 1) + 1];
-        captures.push_back(Value(input.substr(start, limit - start)));
-    }
+    /**
+     * The function will match '_input' string based on the regex pattern present in '_pcre'. If
+     * there is a match, the function will return a 'Value' object encapsulating the matched string,
+     * the code point index of the matched string and a vector representing all the captured
+     * substrings. The function will also update the parameters 'startBytePos' and
+     * 'startCodePointPos' to the corresponding new indices. If there is no match, the function will
+     * return null 'Value' object.
+     */
+    Value nextMatch(int* startBytePos, int* startCodePointPos) {
+        invariant(startBytePos != nullptr && startCodePointPos != nullptr);
+
+        // Use input as StringData throughout the function to avoid copying the string on 'substr'
+        // calls.
+        StringData input = _input;
+        int execResult = pcre_exec(_pcre,
+                                   0,
+                                   input.rawData(),
+                                   input.size(),
+                                   *startBytePos,
+                                   0,  // No need to overwrite the options set during pcre_compile.
+                                   &_capturesBuffer.front(),
+                                   _capturesBuffer.size());
+        // No match.
+        if (execResult < 0) {
+            return Value(BSONNULL);
+        }
+        // The 'execResult' will be zero if _capturesBuffer's size is not big enough to hold all
+        // the captures, which should never be the case.
+        invariant(execResult == _numCaptures + 1);
+
+        // The first and second entries of the '_capturesBuffer' will have the start and limit
+        // indices of the matched string, as byte offsets. '(limit - startIndex)' would be the
+        // length of the captured string.
+        const int matchStartByteIndex = _capturesBuffer[0];
+        StringData matchedStr =
+            input.substr(matchStartByteIndex, _capturesBuffer[1] - matchStartByteIndex);
+        // We iterate through the input string's contents preceding the match index, in order to
+        // convert the byte offset to a code point offset.
+        for (int byteIx = *startBytePos; byteIx < matchStartByteIndex; ++(*startCodePointPos)) {
+            byteIx += getCodePointLength(input[byteIx]);
+        }
+        // Set the start index for match to the new one.
+        *startBytePos = matchStartByteIndex;
+
+        std::vector<Value> captures;
+        captures.reserve(_numCaptures);
+        // The next '2 * numCaptures' entries (after the first two entries) of '_capturesBuffer'
+        // will hold the start index and limit pairs, for each of the capture groups. We skip the
+        // first two elements and start iteration from 3rd element so that we only construct the
+        // strings for capture groups.
+        for (int i = 0; i < _numCaptures; ++i) {
+            const int start = _capturesBuffer[2 * (i + 1)];
+            const int limit = _capturesBuffer[2 * (i + 1) + 1];
+            captures.push_back(Value(input.substr(start, limit - start)));
+        }
 
-    MutableDocument match;
-    match.addField("match", Value(matchedStr));
-    match.addField("idx", Value(startCodePointPos));
-    match.addField("captures", Value(captures));
-    return match.freezeToValue();
-}
+        MutableDocument match;
+        match.addField("match", Value(matchedStr));
+        match.addField("idx", Value(*startCodePointPos));
+        match.addField("captures", Value(captures));
+        return match.freezeToValue();
+    }
 
-}  // namespace
+    int numCaptures() {
+        return _numCaptures;
+    }
 
-Value ExpressionRegexFind::evaluate(const Document& root) const {
+    bool nullish() {
+        return _nullish;
+    }
 
-    const Value expr = vpOperand[0]->evaluate(root);
-    uassert(51103,
-            str::stream() << "$regexFind expects an object of named arguments, but found type "
-                          << expr.getType(),
-            !expr.nullish() && expr.getType() == BSONType::Object);
-    Value textInput = expr.getDocument().getField("input");
-    Value regexPattern = expr.getDocument().getField("regex");
-    Value regexOptions = expr.getDocument().getField("options");
-
-    uassert(51104,
-            "input field should be of type string",
-            textInput.nullish() || textInput.getType() == BSONType::String);
-    uassert(51105,
-            "regex field should be of type string or regex",
-            regexPattern.nullish() || regexPattern.getType() == BSONType::String ||
-                regexPattern.getType() == BSONType::RegEx);
-    uassert(51106,
-            "options should be of type string",
-            regexOptions.nullish() || regexOptions.getType() == BSONType::String);
-    if (textInput.nullish() || regexPattern.nullish()) {
-        return Value(BSONNULL);
+    StringData getInput() {
+        return _input;
     }
 
-    StringData pattern, optionFlags;
-    // The 'regex' field can be a RegEx object with its own options/options specified separately...
-    if (regexPattern.getType() == BSONType::RegEx) {
-        StringData regexFlags = regexPattern.getRegexFlags();
-        pattern = regexPattern.getRegex();
-        uassert(
-            51107,
-            str::stream() << "Found regex option(s) specified in both 'regex' and 'option' fields",
-            regexOptions.nullish() || regexFlags.empty());
-        optionFlags = regexOptions.nullish() ? regexFlags : regexOptions.getStringData();
-    } else {
-        // ... or it can be a string field with options specified separately.
-        pattern = regexPattern.getStringData();
+private:
+    RegexMatchHandler(const RegexMatchHandler&) = delete;
+
+    void _compile(const int pcreOptions) {
+        const char* compile_error;
+        int eoffset;
+        // The C++ interface pcreccp.h doesn't have a way to capture the matched string (or the
+        // index of the match). So we are using the C interface. First we compile all the regex
+        // options to generate pcre object, which will later be used to match against the input
+        // string.
+        _pcre = pcre_compile(_pattern.c_str(), pcreOptions, &compile_error, &eoffset, nullptr);
+        if (_pcre == nullptr) {
+            uasserted(51111, str::stream() << "Invalid Regex: " << compile_error);
+        }
+
+        // Calculate the number of capture groups present in '_pattern' and store in '_numCaptures'.
+        int pcre_retval = pcre_fullinfo(_pcre, NULL, PCRE_INFO_CAPTURECOUNT, &_numCaptures);
+        invariant(pcre_retval == 0);
+
+        // The first two-thirds of the vector is used to pass back captured substrings' start and
+        // limit indexes. The remaining third of the vector is used as workspace by pcre_exec()
+        // while matching capturing subpatterns, and is not available for passing back information.
+        // TODO: Evaluate the upper bound for this array and fail the request if numCaptures are
+        // higher than the limit (SERVER-37848).
+        _capturesBuffer = std::vector<int>((1 + _numCaptures) * 3);
+    }
+
+    void _validateInputAndExtractElements(const Value& inputExpr) {
+        uassert(51103,
+                str::stream() << "$regexFind expects an object of named arguments, but found type "
+                              << inputExpr.getType(),
+                inputExpr.getType() == BSONType::Object);
+        Value textInput = inputExpr.getDocument().getField("input");
+        Value regexPattern = inputExpr.getDocument().getField("regex");
+        Value regexOptions = inputExpr.getDocument().getField("options");
+
+        uassert(51104,
+                "'input' field should be of type string",
+                textInput.nullish() || textInput.getType() == BSONType::String);
+        uassert(51105,
+                "'regex' field should be of type string or regex",
+                regexPattern.nullish() || regexPattern.getType() == BSONType::String ||
+                    regexPattern.getType() == BSONType::RegEx);
+        uassert(51106,
+                "'options' should be of type string",
+                regexOptions.nullish() || regexOptions.getType() == BSONType::String);
+
+        // If either the text input or regex pattern is nullish, then we consider the operation as a
+        // whole nullish.
+        _nullish = textInput.nullish() || regexPattern.nullish();
+
+        if (textInput.getType() == BSONType::String) {
+            _input = textInput.getString();
+        }
+
+        // The 'regex' field can be a RegEx object and may have its own options...
+        if (regexPattern.getType() == BSONType::RegEx) {
+            StringData regexFlags = regexPattern.getRegexFlags();
+            _pattern = regexPattern.getRegex();
+            uassert(51107,
+                    str::stream()
+                        << "Found regex option(s) specified in both 'regex' and 'option' fields",
+                    regexOptions.nullish() || regexFlags.empty());
+            if (!regexFlags.empty()) {
+                _options = regexFlags.toString();
+            }
+        } else if (regexPattern.getType() == BSONType::String) {
+            // ...or it can be a string field with options specified separately.
+            _pattern = regexPattern.getString();
+        }
+        // If 'options' is non-null, we must extract and validate its contents even if
+        // 'regexPattern' is nullish.
         if (!regexOptions.nullish()) {
-            optionFlags = regexOptions.getStringData();
+            _options = regexOptions.getString();
         }
-    }
-    uassert(51109,
-            "Regular expression cannot contain an embedded null byte",
-            pattern.find('\0', 0) == string::npos);
-    uassert(51110,
-            "Regular expression options string cannot contain an embedded null byte",
-            optionFlags.find('\0', 0) == string::npos);
+        uassert(51109,
+                "Regular expression cannot contain an embedded null byte",
+                _pattern.find('\0', 0) == string::npos);
+        uassert(51110,
+                "Regular expression options string cannot contain an embedded null byte",
+                _options.find('\0', 0) == string::npos);
+    }
+
+    pcre* _pcre;
+    // Number of capture groups present in '_pattern'.
+    int _numCaptures;
+    // Holds the start and limit indices of match and captures for the current match.
+    std::vector<int> _capturesBuffer;
+    std::string _input;
+    std::string _pattern;
+    std::string _options;
+    bool _nullish;
+};
 
-    pcrecpp::RE_Options opt = regex_util::flags2PcreOptions(optionFlags, false);
-    pcrecpp::RE regex(pattern.rawData(), opt);
-    return generateRegexCapturesAndMatches(
-        pattern, regex.NumberOfCapturingGroups(), opt, textInput.getStringData(), 0, 0);
-}
+}  // namespace
 
+Value ExpressionRegexFind::evaluate(const Document& root) const {
+
+    RegexMatchHandler regex(vpOperand[0]->evaluate(root));
+    if (regex.nullish()) {
+        return Value(BSONNULL);
+    }
+    int startByteIndex = 0, startCodePointIndex = 0;
+    return regex.nextMatch(&startByteIndex, &startCodePointIndex);
+}
 
 REGISTER_EXPRESSION(regexFind, ExpressionRegexFind::parse);
 const char* ExpressionRegexFind::getOpName() const {
     return "$regexFind";
 }
 
+Value ExpressionRegexFindAll::evaluate(const Document& root) const {
+
+    std::vector<Value> output;
+    RegexMatchHandler regex(vpOperand[0]->evaluate(root));
+    if (regex.nullish()) {
+        return Value(output);
+    }
+    int startByteIndex = 0, startCodePointIndex = 0;
+    StringData input = regex.getInput();
+
+    // Using do...while loop because, when input is an empty string, we still want to see if there
+    // is a match.
+    do {
+        auto matchObj = regex.nextMatch(&startByteIndex, &startCodePointIndex);
+        if (matchObj.getType() == BSONType::jstNULL) {
+            break;
+        }
+        output.push_back(matchObj);
+        std::string matchStr = matchObj.getDocument().getField("match").getString();
+        if (matchStr.empty()) {
+            // This would only happen if the regex matched an empty string. In this case, even if
+            // the character at startByteIndex matches the regex, we cannot return it since we are
+            // already returing an empty string starting at this index. So we move on to the next
+            // byte index.
+            startByteIndex += getCodePointLength(input[startByteIndex]);
+            ++startCodePointIndex;
+            continue;
+        }
+        // We don't want any overlapping sub-strings. So we move 'startByteIndex' to point to the
+        // byte after 'matchStr'. We move the code point index also correspondingly.
+        startByteIndex += matchStr.size();
+        for (size_t byteIx = 0; byteIx < matchStr.size(); ++startCodePointIndex) {
+            byteIx += getCodePointLength(matchStr[byteIx]);
+        }
+        invariant(startByteIndex > 0 && startCodePointIndex > 0 &&
+                  startCodePointIndex <= startByteIndex);
+    } while (static_cast<size_t>(startByteIndex) < input.size());
+    return Value(output);
+}
+
+REGISTER_EXPRESSION(regexFindAll, ExpressionRegexFindAll::parse);
+const char* ExpressionRegexFindAll::getOpName() const {
+    return "$regexFindAll";
+}
+
 }  // namespace mongo
diff --git a/src/mongo/db/pipeline/expression.h b/src/mongo/db/pipeline/expression.h
index 4aa91a67086..b0949cca3fc 100644
--- a/src/mongo/db/pipeline/expression.h
+++ b/src/mongo/db/pipeline/expression.h
@@ -2093,7 +2093,7 @@ private:
     boost::intrusive_ptr<Expression> _onNull;
 };
 
-class ExpressionRegexFind final : public ExpressionFixedArity<ExpressionRegexFind, 1> {
+class ExpressionRegexFind : public ExpressionFixedArity<ExpressionRegexFind, 1> {
 public:
     explicit ExpressionRegexFind(const boost::intrusive_ptr<ExpressionContext>& expCtx)
         : ExpressionFixedArity<ExpressionRegexFind, 1>(expCtx) {}
@@ -2101,4 +2101,13 @@ public:
     Value evaluate(const Document& root) const final;
     const char* getOpName() const final;
 };
+
+class ExpressionRegexFindAll final : public ExpressionFixedArity<ExpressionRegexFindAll, 1> {
+public:
+    explicit ExpressionRegexFindAll(const boost::intrusive_ptr<ExpressionContext>& expCtx)
+        : ExpressionFixedArity<ExpressionRegexFindAll, 1>(expCtx) {}
+
+    Value evaluate(const Document& root) const final;
+    const char* getOpName() const final;
+};
 }
diff --git a/src/mongo/db/pipeline/expression_test.cpp b/src/mongo/db/pipeline/expression_test.cpp
index ca254af56c7..c9bc46a2c8b 100644
--- a/src/mongo/db/pipeline/expression_test.cpp
+++ b/src/mongo/db/pipeline/expression_test.cpp
@@ -5950,7 +5950,7 @@ TEST(GetComputedPathsTest, ExpressionMapNotConsideredRenameWithDottedInputPath)
 
 }  // namespace GetComputedPathsTest
 
-namespace ExpressionRegexFindTest {
+namespace ExpressionRegexTest {
 
 TEST(ExpressionRegexFindTest, BasicTest) {
     Value input(fromjson("{input: 'asdf', regex: '^as' }"));
@@ -5979,11 +5979,64 @@ TEST(ExpressionRegexFindTest, FailureCase) {
     intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
     ExpressionRegexFind regexF(expCtx);
     regexF.addOperand(ExpressionConstant::create(expCtx, input));
-    ASSERT_THROWS(regexF.evaluate(Document()), DBException);
+    ASSERT_THROWS_CODE(regexF.evaluate(Document()), DBException, 51105);
 }
 
+TEST(ExpressionRegexFindAllTest, MultipleMatches) {
+    Value input(fromjson("{input: 'a1b2c3', regex: '([a-c][1-3])' }"));
+    std::vector<Value> expectedOut = {Value(fromjson("{match: 'a1', idx:0, captures:['a1']}")),
+                                      Value(fromjson("{match: 'b2', idx:2, captures:['b2']}")),
+                                      Value(fromjson("{match: 'c3', idx:4, captures:['c3']}"))};
+    intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
+    ExpressionRegexFindAll regexF(expCtx);
+    regexF.addOperand(ExpressionConstant::create(expCtx, input));
+    Value output = regexF.evaluate(Document());
+    ASSERT_VALUE_EQ(output, Value(expectedOut));
+}
+
+TEST(ExpressionRegexFindAllTest, NoMatch) {
+    Value input(fromjson("{input: 'a1b2c3', regex: 'ab' }"));
+    intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
+    ExpressionRegexFindAll regexF(expCtx);
+    regexF.addOperand(ExpressionConstant::create(expCtx, input));
+    Value output = regexF.evaluate(Document());
+    ASSERT_VALUE_EQ(output, Value(std::vector<Value>()));
+}
+
+TEST(ExpressionRegexFindAllTest, FailureCase) {
+    Value input(fromjson("{input: 'FirstLine\\nSecondLine', regex: '[0-9'}"));
+    intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
+    ExpressionRegexFindAll regexF(expCtx);
+    regexF.addOperand(ExpressionConstant::create(expCtx, input));
+    ASSERT_THROWS_CODE(regexF.evaluate(Document()), DBException, 51111);
+}
+
+TEST(ExpressionRegexFindAllTest, InvalidUTF8InInput) {
+    std::string inputField = "1234 ";
+    // Append an invalid UTF-8 character.
+    inputField += static_cast<char>(0xE5);
+    inputField += "  1234";
+    Value input(fromjson("{input: '" + inputField + "', regex: '[0-9]'}"));
+    intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
+    ExpressionRegexFindAll regexF(expCtx);
+    regexF.addOperand(ExpressionConstant::create(expCtx, input));
+    // Verify no match if there is an invalid UTF-8 character in input.
+    ASSERT_VALUE_EQ(regexF.evaluate(Document()), Value(std::vector<Value>()));
+}
+
+TEST(ExpressionRegexFindAllTest, InvalidUTF8InRegex) {
+    std::string regexField = "1234 ";
+    // Append an invalid UTF-8 character.
+    regexField += static_cast<char>(0xE5);
+    Value input(fromjson("{input: '123456', regex: '" + regexField + "'}"));
+    intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
+    ExpressionRegexFindAll regexF(expCtx);
+    regexF.addOperand(ExpressionConstant::create(expCtx, input));
+    // Verify that PCRE will error if REGEX is not a valid UTF-8.
+    ASSERT_THROWS_CODE(regexF.evaluate(Document()), DBException, 51111);
+}
 
-}  // namespace ExpressionRegexFindTest
+}  // namespace ExpressionRegexTest
 
 class All : public Suite {
 public: