jstests/libs/ce_stats_utils.js


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137

load('jstests/aggregation/extras/utils.js');  // For assertArrayEq.
load("jstests/libs/optimizer_utils.js");      // For checkCascadesOptimizerEnabled.

/**
 * Returns a simplified skeleton of the physical plan including intervals & logical CE.
 */
function summarizeExplainForCE(explain) {
    const node = getPlanSkeleton(navigateToRootNode(explain), {
        extraKeepKeys: ["interval", "properties"],
        printLogicalCE: true,
    });
    return node;
}

/**
 * Extracts the cardinality estimate of the explain root node.
 */
function getRootCE(explain) {
    const rootNode = navigateToRootNode(explain);
    assert.neq(rootNode, null, tojson(explain));
    assert.eq(rootNode.nodeType, "Root", tojson(rootNode));
    return extractLogicalCEFromNode(rootNode);
}

/**
 * Asserts that expected and actual are equal, within a small tolerance.
 */
function assertApproxEq(expected, actual, msg, tolerance = 0.01) {
    assert(Math.abs(expected - actual) < tolerance, msg);
}

/**
 * Validates that the results and cardinality estimate for a given $match predicate agree. Note that
 * if the ce parameter is omitted, we expect our estimate to exactly match what the query actually
 * returns.
 */
function verifyCEForMatch({coll, predicate, expected, ce, hint}) {
    const CEs = ce ? [ce] : undefined;
    return verifyCEForMatchNodes(
        {coll, predicate, expected, getNodeCEs: (explain) => [getRootCE(explain)], CEs, hint});
}

/**
 * Validates that the results and cardinality estimate for a given $match predicate agree.
 * The caller should specify a function 'getNodeCEs' which takes explain output as an input, and
 * returns the cardinality estimates of the nodes the caller wants to verify in an array. The
 * expected estimates should be defined in CEs, or it defaults to the number of documents expected
 * to be returned by the query.
 */
function verifyCEForMatchNodes({coll, predicate, expected, getNodeCEs, CEs, hint}) {
    // Run aggregation & verify query results.
    const options = hint ? {hint} : {};
    const actual = coll.aggregate([{$match: predicate}], options).toArray();
    assertArrayEq({actual, expected});

    // Obtain explain.
    const explain = coll.explain().aggregate([{$match: predicate}], options);
    const explainSummarized = tojson(summarizeExplainForCE(explain));
    jsTestLog(explainSummarized);

    // Verify expected vs. actual CE.
    const actualCEs = getNodeCEs(explain);
    const expectedCEs = CEs == undefined ? [expected.length] : CEs;
    assert.eq(actualCEs.length, expectedCEs.length);
    for (let i = 0; i < actualCEs.length; i++) {
        const actualCE = actualCEs[i];
        const expectedCE = expectedCEs[i];
        assertApproxEq(actualCE,
                       expectedCE,
                       `${tojson(predicate)} node ${i} should have been estimated as ${
                           expectedCE}, estimated ${actualCE} instead.`);
    }
}

/**
 * Validates that the generated histogram for the given "coll" has the expected type counters.
 */
function createAndValidateHistogram({coll, expectedHistogram, empty = false}) {
    const field = expectedHistogram._id;
    const stats = db.system.statistics[coll.getName()];
    stats.drop();

    // We can't use forceBonsai here because the new optimizer doesn't know how to handle the
    // analyze command.
    assert.commandWorked(
        db.adminCommand({setParameter: 1, internalQueryFrameworkControl: "tryBonsai"}));

    // Set up histogram for test collection.
    const res = db.runCommand({analyze: coll.getName(), key: field});
    assert.commandWorked(res);

    // Validate histograms.
    const expected = empty ? [] : [expectedHistogram];
    const actual = stats.aggregate({$match: {_id: field}}).toArray();
    assertArrayEq({actual, expected});
}

/**
 * Useful boilerplate code for tests that need to use the analyze command and/or histogram
 * estimation. This ensures that the appropriate flags/query knobs are set and ensures the state of
 * relevant flags is restored after the test.
 */
function runHistogramsTest(test) {
    if (!checkCascadesOptimizerEnabled(db)) {
        jsTestLog("Skipping test because the optimizer is not enabled");
        return;
    }

    // We will be updating some query knobs, so store the old state and restore it after the test.
    const {internalQueryCardinalityEstimatorMode, internalQueryFrameworkControl} = db.adminCommand({
        getParameter: 1,
        internalQueryCardinalityEstimatorMode: 1,
        internalQueryFrameworkControl: 1,
    });

    try {
        test();
    } finally {
        // Reset query knobs to their original state.
        assert.commandWorked(db.adminCommand({
            setParameter: 1,
            internalQueryCardinalityEstimatorMode,
            internalQueryFrameworkControl
        }));
    }
}

/**
 * We need to set the CE query knob to use histograms and force the use of the new optimizer to
 * ensure that we use histograms to estimate CE.
 */
function forceHistogramCE() {
    assert.commandWorked(
        db.adminCommand({setParameter: 1, internalQueryCardinalityEstimatorMode: "histogram"}));
    assert.commandWorked(
        db.adminCommand({setParameter: 1, internalQueryFrameworkControl: "forceBonsai"}));
}