summaryrefslogtreecommitdiff
path: root/jstests/noPassthrough/analyze_with_sampling.js
blob: a863c80d9dfe2951de350d01554e10afd3852e1c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
(function() {
"use strict";

const conn = MongoRunner.runMongod({setParameter: {featureFlagCommonQueryFramework: true}});
assert.neq(null, conn, "mongod was unable to start up");

const db = conn.getDB(jsTestName());

const coll = db.cqf_test_analyze_sample;
const statscoll = db.system.statistics.cqf_test_analyze_sample;
coll.drop();
statscoll.drop();

let data = [];
for (let i = 0; i < 10000; i++) {
    data.push({_id: i, x: i});
}

assert.commandWorked(coll.insertMany(data));

assert.commandWorked(db.runCommand({analyze: coll.getName(), key: "x"}));
const fullStats = statscoll.find({_id: "x"}).toArray()[0];
const fullBounds = fullStats.statistics.scalarHistogram.bounds;

assert.commandWorked(db.runCommand({analyze: coll.getName(), key: "x", sampleRate: 0.01}));
const sampleStats = statscoll.find({_id: "x"}).toArray()[0];
const sampleBounds = sampleStats.statistics.scalarHistogram.bounds;

// Use histogram bounds as a proxy to verify that sampling occured
assert.neq(fullBounds, sampleBounds);
assert.eq(0.01, sampleStats.statistics.sampleRate);
// Verify that roughly 100 documents have been sampled. Because this is not deterministic, we give
// a sufficiently large range to avoid flakiness in the test, but still give us confidence that we
// are not analyzing the entire collection.
assert.betweenIn(50, sampleStats.statistics.documents, 150);

// Test sampleSize
assert.commandWorked(db.runCommand({analyze: coll.getName(), key: "x", sampleSize: 1000}));
const sampleSizeStats = statscoll.find({_id: "x"}).toArray()[0];
assert.eq(0.1, sampleSizeStats.statistics.sampleRate);

assert.commandWorked(db.runCommand({analyze: coll.getName(), key: "x", sampleSize: 100000000}));
const sampleSizeFullStats = statscoll.find({_id: "x"}).toArray()[0];
assert.eq(1.0, sampleSizeFullStats.statistics.sampleRate);

// Test sampling on empty collection
assert.commandWorked(coll.deleteMany({}));
assert.commandWorked(db.runCommand({analyze: coll.getName(), key: "x", sampleRate: 0.5}));
assert.commandWorked(db.runCommand({analyze: coll.getName(), key: "x", sampleSize: 1000}));

MongoRunner.stopMongod(conn);
}());