summaryrefslogtreecommitdiff
path: root/jstests/aggregation/expressions/regex_limits.js
blob: eceaede1b8b77f412e241002c3b065f24dda43bc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
/*
 * Tests to validate limits for $regexFind, $regexFindAll and $regexMatch aggregation expressions.
 */
(function() {
'use strict';

load("jstests/aggregation/extras/utils.js");  // For assertErrorCode().

const coll = db.regex_expr_limit;
coll.drop();
assert.commandWorked(coll.insert({z: "c".repeat(50000) + "d".repeat(50000) + "e"}));

function testRegexAgg(inputObj, expectedOutputForFindAll) {
    const resultFindAll =
        coll.aggregate([{"$project": {_id: 0, "matches": {"$regexFindAll": inputObj}}}]).toArray();
    assert.eq(resultFindAll, [{"matches": expectedOutputForFindAll}]);

    const resultFind =
        coll.aggregate([{"$project": {_id: 0, "matches": {"$regexFind": inputObj}}}]).toArray();
    assert.eq(
        resultFind,
        [{"matches": expectedOutputForFindAll.length == 0 ? null : expectedOutputForFindAll[0]}]);

    const resultMatch =
        coll.aggregate([{"$project": {_id: 0, "matches": {"$regexMatch": inputObj}}}]).toArray();
    assert.eq(resultMatch, [{"matches": expectedOutputForFindAll.length != 0}]);
}

function testRegexAggException(inputObj, exceptionCode, expression) {
    // If expression is defined, run tests only against that expression.
    if (expression != undefined) {
        assertErrorCode(coll, [{"$project": {"matches": {[expression]: inputObj}}}], exceptionCode);
        return;
    }
    assertErrorCode(coll, [{"$project": {"matches": {"$regexFindAll": inputObj}}}], exceptionCode);
    assertErrorCode(coll, [{"$project": {"matches": {"$regexFind": inputObj}}}], exceptionCode);
    assertErrorCode(coll, [{"$project": {"matches": {"$regexMatch": inputObj}}}], exceptionCode);
}

(function testLongRegex() {
    // PCRE doesn't have a direct limit on the regex string length. It will instead error when
    // the internal memory used while compiling reaches 64KB. When there are no capture groups
    // this limit is 32764.
    // Reference : https://www.pcre.org/original/doc/html/pcrelimits.html
    const kMaxRegexPatternLen = 32764;
    const patternMaxLen = "c".repeat(kMaxRegexPatternLen);

    // Test that a regex with maximum allowable pattern length can find a document.
    testRegexAgg({input: "$z", regex: patternMaxLen},
                 [{match: patternMaxLen, "idx": 0, "captures": []}]);

    // Test that a regex pattern exceeding the limit fails.
    const patternTooLong = patternMaxLen + "c";
    testRegexAggException({input: "$z", regex: patternTooLong}, 51111);
})();

(function testBufferOverflow() {
    // $regexFindAll will match each character individually, when the pattern is empty. If there
    // are 'n' characters in the input, it would result to 'n' individual matches. If the
    // pattern further has 'k' capture groups, then the output document will have 'n * k'
    // sub-strings representing the captures.
    const pattern = "(".repeat(100) + ")".repeat(100);
    // If the intermediate document size exceeds 64MB at any point, we will stop further
    // evaluation and throw an error.
    testRegexAggException({input: "$z", regex: pattern}, 51151, "$regexFindAll");

    const pattern2 = "()".repeat(100);
    testRegexAggException({input: "$z", regex: pattern2}, 51151, "$regexFindAll");
})();

(function testNumberOfCaptureGroupLimit() {
    // Even though PCRE has a much higher limit on captures (65535), we will be limited by the
    // other limit, maximum internal memory it uses while compiling is 64KB. PCRE will use a lot
    // more memory when there are capture groups. As the number of capture groups increases, the
    // max length of the regex reduces by a factor of around 4.
    const approxAllowedCaptureGroups = 3999;
    let pattern = "(d)".repeat(approxAllowedCaptureGroups) + "e";
    const expectedOutputCaptures = new Array(approxAllowedCaptureGroups).fill('d');

    testRegexAgg({input: "$z", regex: pattern}, [{
                     match: "d".repeat(approxAllowedCaptureGroups) + "e",
                     "idx": 96001,
                     "captures": expectedOutputCaptures
                 }]);

    // In this case, during execution, PCRE will hit the PCRE_ERROR_RECURSIONLIMIT because of
    // high number of captures and return an error.
    const bufferExecutionFailure = 2553;
    pattern = "(d)".repeat(bufferExecutionFailure) + pattern;
    testRegexAggException({input: "$z", regex: pattern}, 51156);

    // Add one more capture group to the pattern so that it tips over the maximum regex length
    // limit, and verify that PCRE throws an error while attempting to compile.
    pattern = "(d)" + pattern;
    testRegexAggException({input: "$z", regex: pattern}, 51111);
})();

(function testMaxCaptureDepth() {
    const kMaxCaptureDepthLen = 250;
    // Create a pattern with 250 depth captures of the format '(((((...e...))))'.
    const patternMaxDepth = "(".repeat(kMaxCaptureDepthLen) + "e" +
        ")".repeat(kMaxCaptureDepthLen);
    const expectedOutputCaptures = new Array(kMaxCaptureDepthLen).fill('e');

    // Test that there is a match.
    testRegexAgg({input: "$z", regex: patternMaxDepth},
                 [{match: "e", "idx": 100000, "captures": expectedOutputCaptures}]);

    // Add one more and verify that regex expression throws an error.
    const patternTooLong = '(' + patternMaxDepth + ')';
    testRegexAggException({input: "$z", regex: patternTooLong}, 51111);
})();
})();