summaryrefslogtreecommitdiff
path: root/jstests/core/index_bounds_pipe.js
blob: ee6cbd5b5f75b6b75843a5f922f30b3a65de640b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
/**
 * Tests the tightness of index bounds when attempting to match a regex that contains escaped and
 * non-escaped pipe '|' characters.
 */
(function() {
    'use strict';

    load('jstests/libs/analyze_plan.js');

    const collName = 'index_bounds_pipe';
    const coll = db.getCollection(collName);
    coll.drop();

    assert.writeOK(coll.insert({_id: ''}));
    assert.writeOK(coll.insert({_id: '\\|'}));
    assert.writeOK(coll.insert({_id: 'a'}));
    assert.writeOK(coll.insert({_id: 'a|b'}));
    assert.writeOK(coll.insert({_id: 'b'}));
    assert.writeOK(coll.insert({_id: '|'}));

    /**
     * Asserts that a query on a field using 'params.regex' uses index bounds 'params.bounds' and
     * returns results identical to 'params.results'.
     *
     * Also tests that a query using 'params.regex' will return documents with a field of type regex
     * with an identical regular expression value.
     */
    function assertIndexBoundsAndResult(params) {
        const query = {_id: params.regex};
        const command = {find: collName, filter: query, projection: {_id: 1}, sort: {_id: 1}};
        const explain = db.runCommand({explain: command});
        assert.commandWorked(explain);

        // Check that the query uses correct index bounds. When run against a sharded cluster, there
        // may be multiple index scan stages, but each should have the same index bounds.
        const ixscans = getPlanStages(explain.queryPlanner.winningPlan, 'IXSCAN');
        assert.gt(ixscans.length, 0, 'Plan unexpectedly missing IXSCAN stage: ' + tojson(explain));
        for (let i = 0; i < ixscans.length; i++) {
            const ixscan = ixscans[i];
            assert.eq(ixscan.indexBounds._id,
                      params.bounds,
                      `Expected bounds of ${tojson(params.bounds)} but got ${
                          tojson(ixscan.indexBounds._id)}. i=${i}, all output: ${tojson(explain)}`);
        }

        // Check that the query regex matches expected strings.
        const results = db.runCommand(command);
        assert.commandWorked(results);
        assert.eq(results.cursor.firstBatch,
                  params.results,
                  'Regex query ' + tojson(query) + ' returned incorrect results');

        // Check that the query regex will exactly match identical regular expression objects.
        const collRegexValue = db.getCollection(collName + params.regex);
        collRegexValue.drop();
        assert.commandWorked(collRegexValue.createIndex({x: 1}));

        const doc = {_id: 0, x: params.regex};
        assert.writeOK(collRegexValue.insert(doc));

        const regexQuery = {x: params.regex};
        assert.eq(collRegexValue.findOne(regexQuery),
                  doc,
                  'Regex query ' + tojson(regexQuery) +
                      ' did not match document with identical regex value');
    }

    // An anchored regex that uses no special operators can use tight index bounds.
    assertIndexBoundsAndResult(
        {regex: /^a/, bounds: ['["a", "b")', '[/^a/, /^a/]'], results: [{_id: 'a'}, {_id: 'a|b'}]});
    assertIndexBoundsAndResult(
        {regex: /^\\/, bounds: ['["\\", "]")', '[/^\\\\/, /^\\\\/]'], results: [{_id: '\\|'}]});

    // An anchored regex using the alternation operator cannot use tight index bounds.
    assertIndexBoundsAndResult({
        regex: /^a|b/,
        bounds: ['["", {})', '[/^a|b/, /^a|b/]'],
        results: [{_id: 'a'}, {_id: 'a|b'}, {_id: 'b'}]
    });

    // An anchored regex that uses an escaped pipe character can use tight index bounds.
    assertIndexBoundsAndResult(
        {regex: /^a\|/, bounds: ['["a|", "a}")', '[/^a\\|/, /^a\\|/]'], results: [{_id: 'a|b'}]});
    assertIndexBoundsAndResult(
        {regex: /^\|/, bounds: ['["|", "}")', '[/^\\|/, /^\\|/]'], results: [{_id: '|'}]});

    // A pipe character that is preceded by an escaped backslash is correctly interpreted as the
    // alternation operator and cannot use tight index bounds.
    assertIndexBoundsAndResult({
        regex: /^\\|b/,
        bounds: ['["", {})', '[/^\\\\|b/, /^\\\\|b/]'],
        results: [{_id: '\\|'}, {_id: 'a|b'}, {_id: 'b'}]
    });
    assertIndexBoundsAndResult({
        regex: /^\\|^b/,
        bounds: ['["", {})', '[/^\\\\|^b/, /^\\\\|^b/]'],
        results: [{_id: '\\|'}, {_id: 'b'}]
    });

    // An escaped backslash immediately followed by an escaped pipe does not use tight index bounds.
    assertIndexBoundsAndResult({
        regex: /^\\\|/,
        bounds: ['["", {})', '[/^\\\\\\|/, /^\\\\\\|/]'],
        results: [{_id: '\\|'}]
    });

    // A pipe escaped with the \Q...\E escape sequence does not use tight index bounds.
    assertIndexBoundsAndResult(
        {regex: /^\Q|\E/, bounds: ['["", {})', '[/^\\Q|\\E/, /^\\Q|\\E/]'], results: [{_id: '|'}]});

    // An escaped pipe within \Q...\E can use tight index bounds.
    assertIndexBoundsAndResult({
        regex: /^\Q\|\E/,
        bounds: ['["\\|", "\\}")', '[/^\\Q\\|\\E/, /^\\Q\\|\\E/]'],
        results: [{_id: '\\|'}]
    });
}());