summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNaama Bareket <naama.bareket@mongodb.com>2023-02-06 15:20:47 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-02-06 16:23:01 +0000
commit39ddb8c2c82900ccd4452928d9fb0c8f22b35e3a (patch)
treea9f542eb25af3cf608aebe6624dbd4bdd6d1c693
parent50922d144694ae20c397126dcd3a520219771070 (diff)
downloadmongo-39ddb8c2c82900ccd4452928d9fb0c8f22b35e3a.tar.gz
SERVER-72518 Make 5% random-cursor $sample cutoff configurable
-rw-r--r--jstests/noPassthrough/sample_pushdown_with_set_cluster_param.js103
-rw-r--r--src/mongo/db/pipeline/pipeline_d.cpp12
-rw-r--r--src/mongo/db/query/SConscript1
-rw-r--r--src/mongo/db/query/query_knobs.idl24
-rw-r--r--src/mongo/db/sorter/SConscript1
5 files changed, 140 insertions, 1 deletions
diff --git a/jstests/noPassthrough/sample_pushdown_with_set_cluster_param.js b/jstests/noPassthrough/sample_pushdown_with_set_cluster_param.js
new file mode 100644
index 00000000000..5af0867d124
--- /dev/null
+++ b/jstests/noPassthrough/sample_pushdown_with_set_cluster_param.js
@@ -0,0 +1,103 @@
+/**
+ * Verify that $sample push down works when setting 'internalQueryCutoffForSampleFromRandomCursor'
+ * cluster paramater.
+ *
+ * Requires random cursor support.
+ * @tags: [requires_replication]
+ */
+(function() {
+'use strict';
+
+load('jstests/libs/analyze_plan.js'); // For planHasStage.
+
+const numDocs = 1000;
+const sampleSize = numDocs * .06;
+let docs = [];
+for (let i = 0; i < numDocs; ++i) {
+ docs.push({a: i});
+}
+
+const rst = new ReplSetTest({nodes: 1});
+rst.startSet();
+rst.initiate();
+const collName = 'sample_pushdown';
+const dbName = 'admin';
+const testDB = rst.getPrimary().getDB(dbName);
+const coll = testDB[collName];
+assert.commandWorked(coll.insert(docs));
+const pipeline = [{$sample: {size: sampleSize}}, {$match: {a: {$gte: 0}}}];
+
+// Tests that with the default value for the cluster parameter (5%), the constructed plan will not
+// use a storage engine random cursor since our sample size is more than 5% of the number of
+// documents in our collection.
+(function testDefaultClusterParamValue() {
+ // // Verify that our pipeline uses $sample push down.
+ const explain = coll.explain().aggregate(pipeline);
+ assert(!aggPlanHasStage(explain, "$sampleFromRandomCursor"), tojson(explain));
+
+ // Run the pipeline.
+ const randDocs = testDB[collName].aggregate(pipeline).toArray();
+
+ // Verify that we have the correct number of docs.
+ assert.eq(randDocs.length, sampleSize, tojson(randDocs));
+})();
+
+// Tests that with the setting the cluster parameter value to 7%, the constructed plan will use a
+// storage engine random cursor since our sample size is less than 7% of the number of documents in
+// our collection.
+(function testNotDefaultClusterParamValue() {
+ // Try to set the cluster parameter to 0, should fail since the value must be gt 0 and lte 1.
+ const clusterParameterValue0 = {sampleCutoff: 0};
+ const clusterParameterName0 = 'internalQueryCutoffForSampleFromRandomCursor';
+ const clusterParameter0 = {[clusterParameterName0]: clusterParameterValue0};
+ assert.commandFailedWithCode(testDB.runCommand({setClusterParameter: clusterParameter0}),
+ 51024);
+
+ // Set the cluster parameter to have a cutoff of 7%.
+ const clusterParameterValue = {sampleCutoff: 0.07};
+ const clusterParameterName = 'internalQueryCutoffForSampleFromRandomCursor';
+ const clusterParameter = {[clusterParameterName]: clusterParameterValue};
+ assert.commandWorked(testDB.runCommand({setClusterParameter: clusterParameter}));
+
+ // Make sure this cluster parameter holds the correct value.
+ const getClusterVal =
+ assert
+ .commandWorked(testDB.runCommand(
+ {getClusterParameter: clusterParameterName}))["clusterParameters"][0]
+ .sampleCutoff;
+ assert.eq(getClusterVal, 0.07);
+
+ // Verify that our pipeline uses $sample push down, since the sample size is less than 7% of the
+ // number of documents in our collection.
+ const explain = coll.explain().aggregate(pipeline);
+ assert(aggPlanHasStage(explain, "$sampleFromRandomCursor"), tojson(explain));
+
+ // Set the cluster parameter to have a cutoff of 1%.
+ const clusterParameterValue1 = {sampleCutoff: 0.01};
+ const clusterParameterName1 = 'internalQueryCutoffForSampleFromRandomCursor';
+ const clusterParameter1 = {[clusterParameterName1]: clusterParameterValue1};
+ assert.commandWorked(testDB.runCommand({setClusterParameter: clusterParameter1}));
+
+ // Make sure this cluster parameter holds the correct value.
+ const getClusterVal1 =
+ assert
+ .commandWorked(testDB.runCommand(
+ {getClusterParameter: clusterParameterName1}))["clusterParameters"][0]
+ .sampleCutoff;
+ assert.eq(getClusterVal1, 0.01);
+
+ // Verify that our pipeline does not use $sample push down, since the sample size is more than
+ // 1% of the number of documents in our collection.
+ const explain2 = coll.explain().aggregate(pipeline);
+ assert(!aggPlanHasStage(explain2, "$sampleFromRandomCursor"), tojson(explain2));
+
+ // Run the pipeline.
+ const randDocs = testDB[collName].aggregate(pipeline).toArray();
+
+ // Verify that we have the correct number of docs.
+ assert.eq(randDocs.length, sampleSize, tojson(randDocs));
+})();
+
+// // Clean up.
+rst.stopSet();
+})();
diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp
index bb146b1cb94..f0d4d224f42 100644
--- a/src/mongo/db/pipeline/pipeline_d.cpp
+++ b/src/mongo/db/pipeline/pipeline_d.cpp
@@ -516,7 +516,17 @@ StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> PipelineD::createRan
// function because double-locking forces any PlanExecutor we create to adopt a NO_YIELD policy.
invariant(opCtx->lockState()->isCollectionLockedForMode(coll->ns(), MODE_IS));
- static const double kMaxSampleRatioForRandCursor = 0.05;
+ auto* clusterParameters = ServerParameterSet::getClusterParameterSet();
+ auto* randomCursorSampleRatioParam =
+ clusterParameters
+ ->get<ClusterParameterWithStorage<InternalQueryCutoffForSampleFromRandomCursorStorage>>(
+ "internalQueryCutoffForSampleFromRandomCursor");
+
+ auto maxSampleRatioClusterParameter =
+ randomCursorSampleRatioParam->getValue(expCtx->ns.tenantId());
+
+ const double kMaxSampleRatioForRandCursor = maxSampleRatioClusterParameter.getSampleCutoff();
+
if (!expCtx->ns.isTimeseriesBucketsCollection()) {
if (sampleSize > numRecords * kMaxSampleRatioForRandCursor || numRecords <= 100) {
return nullptr;
diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript
index bb1b4efda4c..a2c337ce8bd 100644
--- a/src/mongo/db/query/SConscript
+++ b/src/mongo/db/query/SConscript
@@ -267,6 +267,7 @@ env.Library(
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/db/server_base',
'$BUILD_DIR/mongo/db/service_context',
+ '$BUILD_DIR/mongo/idl/cluster_server_parameter',
'$BUILD_DIR/mongo/util/pcre_wrapper',
'memory_util',
],
diff --git a/src/mongo/db/query/query_knobs.idl b/src/mongo/db/query/query_knobs.idl
index 64d8b782359..d4ca334491f 100644
--- a/src/mongo/db/query/query_knobs.idl
+++ b/src/mongo/db/query/query_knobs.idl
@@ -26,6 +26,9 @@
# it in the license file.
#
+imports:
+ - "mongo/idl/cluster_server_parameter.idl"
+
global:
cpp_namespace: "mongo"
cpp_includes:
@@ -61,6 +64,21 @@ enums:
# Use a prefix of sha256 redaction strategy
kSha256RedactionStrategy: "sha256"
+structs:
+ InternalQueryCutoffForSampleFromRandomCursorStorage:
+ description: "A specification for the 'internalQueryCutoffForSampleFromRandomCursor' cluster-wide configuration parameter type."
+ inline_chained_structs: true
+ chained_structs:
+ ClusterServerParameter: clusterServerParameter
+ fields:
+ sampleCutoff:
+ description: "The configuration for random-cursor sample cutoffs."
+ type: double
+ default: 0.05
+ validator:
+ gt: 0.0
+ lte: 1.0
+
server_parameters:
#
@@ -1076,6 +1094,12 @@ server_parameters:
gte: 0
on_update: plan_cache_util::clearSbeCacheOnParameterChange
+ internalQueryCutoffForSampleFromRandomCursor:
+ description: "The maximum sample ratio for random cursor."
+ set_at: cluster
+ cpp_varname: "internalQueryCutoffForSampleFromRandomCursor"
+ cpp_vartype: InternalQueryCutoffForSampleFromRandomCursorStorage
+
internalQueryGlobalProfilingFilter:
description: "Enables the setProfilingFilterGlobally command."
set_at: [ startup ]
diff --git a/src/mongo/db/sorter/SConscript b/src/mongo/db/sorter/SConscript
index a52da1d0c7a..5f88dabdc58 100644
--- a/src/mongo/db/sorter/SConscript
+++ b/src/mongo/db/sorter/SConscript
@@ -13,6 +13,7 @@ sorterEnv.CppUnitTest(
],
LIBDEPS=[
'$BUILD_DIR/mongo/db/exec/document_value/document_value',
+ '$BUILD_DIR/mongo/db/query/query_knobs',
'$BUILD_DIR/mongo/db/service_context',
'$BUILD_DIR/mongo/db/storage/encryption_hooks',
'$BUILD_DIR/mongo/db/storage/storage_options',