1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
/**
* Verify that $sample push down works when setting 'internalQueryCutoffForSampleFromRandomCursor'
* cluster paramater.
*
* Requires random cursor support.
* @tags: [requires_replication]
*/
(function() {
'use strict';
load('jstests/libs/analyze_plan.js'); // For planHasStage.
const numDocs = 1000;
const sampleSize = numDocs * .06;
let docs = [];
for (let i = 0; i < numDocs; ++i) {
docs.push({a: i});
}
const rst = new ReplSetTest({nodes: 1});
rst.startSet();
rst.initiate();
const collName = 'sample_pushdown';
const dbName = 'admin';
const testDB = rst.getPrimary().getDB(dbName);
const coll = testDB[collName];
assert.commandWorked(coll.insert(docs));
const pipeline = [{$sample: {size: sampleSize}}, {$match: {a: {$gte: 0}}}];
// Tests that with the default value for the cluster parameter (5%), the constructed plan will not
// use a storage engine random cursor since our sample size is more than 5% of the number of
// documents in our collection.
(function testDefaultClusterParamValue() {
// // Verify that our pipeline uses $sample push down.
const explain = coll.explain().aggregate(pipeline);
assert(!aggPlanHasStage(explain, "$sampleFromRandomCursor"), tojson(explain));
// Run the pipeline.
const randDocs = testDB[collName].aggregate(pipeline).toArray();
// Verify that we have the correct number of docs.
assert.eq(randDocs.length, sampleSize, tojson(randDocs));
})();
// Tests that with the setting the cluster parameter value to 7%, the constructed plan will use a
// storage engine random cursor since our sample size is less than 7% of the number of documents in
// our collection.
(function testNotDefaultClusterParamValue() {
// Try to set the cluster parameter to 0, should fail since the value must be gt 0 and lte 1.
const clusterParameterValue0 = {sampleCutoff: 0};
const clusterParameterName0 = 'internalQueryCutoffForSampleFromRandomCursor';
const clusterParameter0 = {[clusterParameterName0]: clusterParameterValue0};
assert.commandFailedWithCode(testDB.runCommand({setClusterParameter: clusterParameter0}),
51024);
// Set the cluster parameter to have a cutoff of 7%.
const clusterParameterValue = {sampleCutoff: 0.07};
const clusterParameterName = 'internalQueryCutoffForSampleFromRandomCursor';
const clusterParameter = {[clusterParameterName]: clusterParameterValue};
assert.commandWorked(testDB.runCommand({setClusterParameter: clusterParameter}));
// Make sure this cluster parameter holds the correct value.
const getClusterVal =
assert
.commandWorked(testDB.runCommand(
{getClusterParameter: clusterParameterName}))["clusterParameters"][0]
.sampleCutoff;
assert.eq(getClusterVal, 0.07);
// Verify that our pipeline uses $sample push down, since the sample size is less than 7% of the
// number of documents in our collection.
const explain = coll.explain().aggregate(pipeline);
assert(aggPlanHasStage(explain, "$sampleFromRandomCursor"), tojson(explain));
// Set the cluster parameter to have a cutoff of 1%.
const clusterParameterValue1 = {sampleCutoff: 0.01};
const clusterParameterName1 = 'internalQueryCutoffForSampleFromRandomCursor';
const clusterParameter1 = {[clusterParameterName1]: clusterParameterValue1};
assert.commandWorked(testDB.runCommand({setClusterParameter: clusterParameter1}));
// Make sure this cluster parameter holds the correct value.
const getClusterVal1 =
assert
.commandWorked(testDB.runCommand(
{getClusterParameter: clusterParameterName1}))["clusterParameters"][0]
.sampleCutoff;
assert.eq(getClusterVal1, 0.01);
// Verify that our pipeline does not use $sample push down, since the sample size is more than
// 1% of the number of documents in our collection.
const explain2 = coll.explain().aggregate(pipeline);
assert(!aggPlanHasStage(explain2, "$sampleFromRandomCursor"), tojson(explain2));
// Run the pipeline.
const randDocs = testDB[collName].aggregate(pipeline).toArray();
// Verify that we have the correct number of docs.
assert.eq(randDocs.length, sampleSize, tojson(randDocs));
})();
// // Clean up.
rst.stopSet();
})();
|