diff options
author | Naama Bareket <naama.bareket@mongodb.com> | 2023-02-06 15:20:47 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-02-06 16:23:01 +0000 |
commit | 39ddb8c2c82900ccd4452928d9fb0c8f22b35e3a (patch) | |
tree | a9f542eb25af3cf608aebe6624dbd4bdd6d1c693 | |
parent | 50922d144694ae20c397126dcd3a520219771070 (diff) | |
download | mongo-39ddb8c2c82900ccd4452928d9fb0c8f22b35e3a.tar.gz |
SERVER-72518 Make 5% random-cursor $sample cutoff configurable
-rw-r--r-- | jstests/noPassthrough/sample_pushdown_with_set_cluster_param.js | 103 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline_d.cpp | 12 | ||||
-rw-r--r-- | src/mongo/db/query/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/query/query_knobs.idl | 24 | ||||
-rw-r--r-- | src/mongo/db/sorter/SConscript | 1 |
5 files changed, 140 insertions, 1 deletions
diff --git a/jstests/noPassthrough/sample_pushdown_with_set_cluster_param.js b/jstests/noPassthrough/sample_pushdown_with_set_cluster_param.js new file mode 100644 index 00000000000..5af0867d124 --- /dev/null +++ b/jstests/noPassthrough/sample_pushdown_with_set_cluster_param.js @@ -0,0 +1,103 @@ +/** + * Verify that $sample push down works when setting 'internalQueryCutoffForSampleFromRandomCursor' + * cluster paramater. + * + * Requires random cursor support. + * @tags: [requires_replication] + */ +(function() { +'use strict'; + +load('jstests/libs/analyze_plan.js'); // For planHasStage. + +const numDocs = 1000; +const sampleSize = numDocs * .06; +let docs = []; +for (let i = 0; i < numDocs; ++i) { + docs.push({a: i}); +} + +const rst = new ReplSetTest({nodes: 1}); +rst.startSet(); +rst.initiate(); +const collName = 'sample_pushdown'; +const dbName = 'admin'; +const testDB = rst.getPrimary().getDB(dbName); +const coll = testDB[collName]; +assert.commandWorked(coll.insert(docs)); +const pipeline = [{$sample: {size: sampleSize}}, {$match: {a: {$gte: 0}}}]; + +// Tests that with the default value for the cluster parameter (5%), the constructed plan will not +// use a storage engine random cursor since our sample size is more than 5% of the number of +// documents in our collection. +(function testDefaultClusterParamValue() { + // // Verify that our pipeline uses $sample push down. + const explain = coll.explain().aggregate(pipeline); + assert(!aggPlanHasStage(explain, "$sampleFromRandomCursor"), tojson(explain)); + + // Run the pipeline. + const randDocs = testDB[collName].aggregate(pipeline).toArray(); + + // Verify that we have the correct number of docs. + assert.eq(randDocs.length, sampleSize, tojson(randDocs)); +})(); + +// Tests that with the setting the cluster parameter value to 7%, the constructed plan will use a +// storage engine random cursor since our sample size is less than 7% of the number of documents in +// our collection. +(function testNotDefaultClusterParamValue() { + // Try to set the cluster parameter to 0, should fail since the value must be gt 0 and lte 1. + const clusterParameterValue0 = {sampleCutoff: 0}; + const clusterParameterName0 = 'internalQueryCutoffForSampleFromRandomCursor'; + const clusterParameter0 = {[clusterParameterName0]: clusterParameterValue0}; + assert.commandFailedWithCode(testDB.runCommand({setClusterParameter: clusterParameter0}), + 51024); + + // Set the cluster parameter to have a cutoff of 7%. + const clusterParameterValue = {sampleCutoff: 0.07}; + const clusterParameterName = 'internalQueryCutoffForSampleFromRandomCursor'; + const clusterParameter = {[clusterParameterName]: clusterParameterValue}; + assert.commandWorked(testDB.runCommand({setClusterParameter: clusterParameter})); + + // Make sure this cluster parameter holds the correct value. + const getClusterVal = + assert + .commandWorked(testDB.runCommand( + {getClusterParameter: clusterParameterName}))["clusterParameters"][0] + .sampleCutoff; + assert.eq(getClusterVal, 0.07); + + // Verify that our pipeline uses $sample push down, since the sample size is less than 7% of the + // number of documents in our collection. + const explain = coll.explain().aggregate(pipeline); + assert(aggPlanHasStage(explain, "$sampleFromRandomCursor"), tojson(explain)); + + // Set the cluster parameter to have a cutoff of 1%. + const clusterParameterValue1 = {sampleCutoff: 0.01}; + const clusterParameterName1 = 'internalQueryCutoffForSampleFromRandomCursor'; + const clusterParameter1 = {[clusterParameterName1]: clusterParameterValue1}; + assert.commandWorked(testDB.runCommand({setClusterParameter: clusterParameter1})); + + // Make sure this cluster parameter holds the correct value. + const getClusterVal1 = + assert + .commandWorked(testDB.runCommand( + {getClusterParameter: clusterParameterName1}))["clusterParameters"][0] + .sampleCutoff; + assert.eq(getClusterVal1, 0.01); + + // Verify that our pipeline does not use $sample push down, since the sample size is more than + // 1% of the number of documents in our collection. + const explain2 = coll.explain().aggregate(pipeline); + assert(!aggPlanHasStage(explain2, "$sampleFromRandomCursor"), tojson(explain2)); + + // Run the pipeline. + const randDocs = testDB[collName].aggregate(pipeline).toArray(); + + // Verify that we have the correct number of docs. + assert.eq(randDocs.length, sampleSize, tojson(randDocs)); +})(); + +// // Clean up. +rst.stopSet(); +})(); diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp index bb146b1cb94..f0d4d224f42 100644 --- a/src/mongo/db/pipeline/pipeline_d.cpp +++ b/src/mongo/db/pipeline/pipeline_d.cpp @@ -516,7 +516,17 @@ StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> PipelineD::createRan // function because double-locking forces any PlanExecutor we create to adopt a NO_YIELD policy. invariant(opCtx->lockState()->isCollectionLockedForMode(coll->ns(), MODE_IS)); - static const double kMaxSampleRatioForRandCursor = 0.05; + auto* clusterParameters = ServerParameterSet::getClusterParameterSet(); + auto* randomCursorSampleRatioParam = + clusterParameters + ->get<ClusterParameterWithStorage<InternalQueryCutoffForSampleFromRandomCursorStorage>>( + "internalQueryCutoffForSampleFromRandomCursor"); + + auto maxSampleRatioClusterParameter = + randomCursorSampleRatioParam->getValue(expCtx->ns.tenantId()); + + const double kMaxSampleRatioForRandCursor = maxSampleRatioClusterParameter.getSampleCutoff(); + if (!expCtx->ns.isTimeseriesBucketsCollection()) { if (sampleSize > numRecords * kMaxSampleRatioForRandCursor || numRecords <= 100) { return nullptr; diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript index bb1b4efda4c..a2c337ce8bd 100644 --- a/src/mongo/db/query/SConscript +++ b/src/mongo/db/query/SConscript @@ -267,6 +267,7 @@ env.Library( LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/db/server_base', '$BUILD_DIR/mongo/db/service_context', + '$BUILD_DIR/mongo/idl/cluster_server_parameter', '$BUILD_DIR/mongo/util/pcre_wrapper', 'memory_util', ], diff --git a/src/mongo/db/query/query_knobs.idl b/src/mongo/db/query/query_knobs.idl index 64d8b782359..d4ca334491f 100644 --- a/src/mongo/db/query/query_knobs.idl +++ b/src/mongo/db/query/query_knobs.idl @@ -26,6 +26,9 @@ # it in the license file. # +imports: + - "mongo/idl/cluster_server_parameter.idl" + global: cpp_namespace: "mongo" cpp_includes: @@ -61,6 +64,21 @@ enums: # Use a prefix of sha256 redaction strategy kSha256RedactionStrategy: "sha256" +structs: + InternalQueryCutoffForSampleFromRandomCursorStorage: + description: "A specification for the 'internalQueryCutoffForSampleFromRandomCursor' cluster-wide configuration parameter type." + inline_chained_structs: true + chained_structs: + ClusterServerParameter: clusterServerParameter + fields: + sampleCutoff: + description: "The configuration for random-cursor sample cutoffs." + type: double + default: 0.05 + validator: + gt: 0.0 + lte: 1.0 + server_parameters: # @@ -1076,6 +1094,12 @@ server_parameters: gte: 0 on_update: plan_cache_util::clearSbeCacheOnParameterChange + internalQueryCutoffForSampleFromRandomCursor: + description: "The maximum sample ratio for random cursor." + set_at: cluster + cpp_varname: "internalQueryCutoffForSampleFromRandomCursor" + cpp_vartype: InternalQueryCutoffForSampleFromRandomCursorStorage + internalQueryGlobalProfilingFilter: description: "Enables the setProfilingFilterGlobally command." set_at: [ startup ] diff --git a/src/mongo/db/sorter/SConscript b/src/mongo/db/sorter/SConscript index a52da1d0c7a..5f88dabdc58 100644 --- a/src/mongo/db/sorter/SConscript +++ b/src/mongo/db/sorter/SConscript @@ -13,6 +13,7 @@ sorterEnv.CppUnitTest( ], LIBDEPS=[ '$BUILD_DIR/mongo/db/exec/document_value/document_value', + '$BUILD_DIR/mongo/db/query/query_knobs', '$BUILD_DIR/mongo/db/service_context', '$BUILD_DIR/mongo/db/storage/encryption_hooks', '$BUILD_DIR/mongo/db/storage/storage_options', |