diff options
author | Nick Zolnierz <nicholas.zolnierz@mongodb.com> | 2021-03-31 14:35:10 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-04-30 14:22:03 +0000 |
commit | f36132ec219dd8cfdab420403f10d18666249947 (patch) | |
tree | b670698b942e7dbf84e01ae9e1afd061680ec457 /jstests | |
parent | 3295126e8a081ca57af1a07c6c582fac7a825efd (diff) | |
download | mongo-f36132ec219dd8cfdab420403f10d18666249947.tar.gz |
SERVER-54664 Add per-function execution stats to $setWindowFields stage
Diffstat (limited to 'jstests')
3 files changed, 156 insertions, 3 deletions
diff --git a/jstests/aggregation/sources/setWindowFields/explain.js b/jstests/aggregation/sources/setWindowFields/explain.js new file mode 100644 index 00000000000..fbae93889d8 --- /dev/null +++ b/jstests/aggregation/sources/setWindowFields/explain.js @@ -0,0 +1,149 @@ +/** + * Tests that $setWindowFields stage reports memory footprint per function when explain is run + * with verbosities "executionStats" and "allPlansExecution". + * + * @tags: [assumes_against_mongod_not_mongos] + */ +(function() { +"use strict"; + +load("jstests/libs/analyze_plan.js"); // For getAggPlanStages(). + +const coll = db[jsTestName()]; +coll.drop(); +const bigStr = Array(1025).toString(); // 1KB of ',' +const nDocs = 1000; +const nPartitions = 50; +const docSize = 8 + 8 + 1024; + +const featureEnabled = + assert.commandWorked(db.adminCommand({getParameter: 1, featureFlagWindowFunctions: 1})) + .featureFlagWindowFunctions.value; +if (!featureEnabled) { + jsTestLog("Skipping test because the window function feature flag is disabled"); + return; +} + +let bulk = coll.initializeUnorderedBulkOp(); +for (let i = 1; i <= nDocs; i++) { + bulk.insert({_id: i, key: i % nPartitions, bigStr: bigStr}); +} +assert.commandWorked(bulk.execute()); + +/** + * Checks that the execution stats in the explain output for a $setWindowFields stage are as + * expected. + * - 'stages' is an array of the explain output of $setWindowFields stages. + * - 'expectedFunctionMemUsages' is used to check the memory footprint stats for each function. + * - 'verbosity' indicates the explain verbosity used. + */ +function checkExplainResult(pipeline, expectedFunctionMemUsages, verbosity) { + const stages = + getAggPlanStages(coll.explain(verbosity).aggregate(pipeline), "$_internalSetWindowFields"); + for (let stage of stages) { + assert(stage.hasOwnProperty("$_internalSetWindowFields"), stage); + + if (verbosity === "executionStats" || verbosity === "allPlansExecution") { + assert(stage.hasOwnProperty("maxFunctionMemoryUsageBytes"), stage); + const maxFunctionMemUsages = stage["maxFunctionMemoryUsageBytes"]; + for (let field of Object.keys(maxFunctionMemUsages)) { + // Ensures that the expected functions are all included and the corresponding + // memory usage is in a reasonable range. + if (expectedFunctionMemUsages.hasOwnProperty(field)) { + assert.gt(maxFunctionMemUsages[field], + expectedFunctionMemUsages[field], + "mismatch for function '" + field + "': " + tojson(stage)); + assert.lt(maxFunctionMemUsages[field], + 2 * expectedFunctionMemUsages[field], + "mismatch for function '" + field + "': " + tojson(stage)); + } + } + } else { + assert(!stage.hasOwnProperty("maxFunctionMemoryUsageBytes"), stage); + } + } +} + +(function testQueryPlannerVerbosity() { + const pipeline = [ + { + $setWindowFields: + {output: {count: {$sum: 1}, push: {$push: "$bigStr"}, set: {$addToSet: "$bigStr"}}} + }, + ]; + const stages = getAggPlanStages(coll.explain("queryPlanner").aggregate(pipeline), + "$_internalSetWindowFields"); + checkExplainResult(stages, {}, "queryPlanner"); +})(); + +(function testUnboundedMemUsage() { + let pipeline = [ + { + $setWindowFields: + {output: {count: {$sum: 1}, push: {$push: "$bigStr"}, set: {$addToSet: "$bigStr"}}} + }, + ]; + + // The $setWindowFields stage "streams" one partition at a time, so there's only one instance of + // each function. For the default [unbounded, unbounded] window type, each function uses memory + // usage comparable to it's $group counterpart. + let expectedFunctionMemUsages = { + count: 60, + push: nDocs * 1024, + set: 1024, + }; + + checkExplainResult(pipeline, expectedFunctionMemUsages, "executionStats"); + checkExplainResult(pipeline, expectedFunctionMemUsages, "allPlansExecution"); + + // Test that the memory footprint is reduced with partitioning. + pipeline = [ + { + $setWindowFields: { + partitionBy: "$key", + output: {count: {$sum: 1}, push: {$push: "$bigStr"}, set: {$addToSet: "$bigStr"}} + } + }, + ]; + expectedFunctionMemUsages = { + count: 60, + push: (nDocs / nPartitions) * 1024, + set: 1024, + }; + + checkExplainResult(pipeline, expectedFunctionMemUsages, "executionStats"); + checkExplainResult(pipeline, expectedFunctionMemUsages, "allPlansExecution"); +})(); + +(function testSlidingWindowMemUsage() { + const windowSize = 10; + let pipeline = [ + { + $setWindowFields: { + sortBy: {_id: 1}, + output: {runningSum: {$sum: "$_id", window: {documents: [-5, 4]}}} + } + }, + ]; + const expectedFunctionMemUsages = { + runningSum: windowSize * 16 + + 160, // 10x64-bit integer values per window, and 160 for the $sum state. + }; + + checkExplainResult(pipeline, expectedFunctionMemUsages, "executionStats"); + checkExplainResult(pipeline, expectedFunctionMemUsages, "allPlansExecution"); + + // Adding partitioning doesn't change the peak memory usage. + pipeline = [ + { + $setWindowFields: { + partitionBy: "$key", + sortBy: {_id: 1}, + output: {runningSum: {$sum: "$_id", window: {documents: [-5, 4]}}} + } + }, + ]; + checkExplainResult(pipeline, expectedFunctionMemUsages, "executionStats"); + checkExplainResult(pipeline, expectedFunctionMemUsages, "allPlansExecution"); +})(); +}()); diff --git a/jstests/aggregation/sources/setWindowFields/memory_limit.js b/jstests/aggregation/sources/setWindowFields/memory_limit.js index 6e705beaa4b..f960780d06b 100644 --- a/jstests/aggregation/sources/setWindowFields/memory_limit.js +++ b/jstests/aggregation/sources/setWindowFields/memory_limit.js @@ -58,6 +58,7 @@ assert.commandWorked(coll.runCommand({ }], cursor: {} })); + // Test that the query fails with a window function that stores documents. assert.commandFailedWithCode(coll.runCommand({ aggregate: coll.getName(), diff --git a/jstests/noPassthrough/explain_group_stage_exec_stats.js b/jstests/noPassthrough/explain_group_stage_exec_stats.js index 55be2c77dc2..257d9676baa 100644 --- a/jstests/noPassthrough/explain_group_stage_exec_stats.js +++ b/jstests/noPassthrough/explain_group_stage_exec_stats.js @@ -45,7 +45,7 @@ const expectedAccumMemUsages = { function checkGroupStages(stages, expectedAccumMemUsages, isExecExplain, shouldSpillToDisk) { // Tracks the memory usage per accumulator in total as 'stages' passed in could be the explain // output across a cluster. - let totalAccmMemoryUsageBytes = 0; + let totalAccumMemoryUsageBytes = 0; for (let stage of stages) { assert(stage.hasOwnProperty("$group"), stage); @@ -54,7 +54,7 @@ function checkGroupStages(stages, expectedAccumMemUsages, isExecExplain, shouldS assert(stage.hasOwnProperty("maxAccumulatorMemoryUsageBytes"), stage); const maxAccmMemUsages = stage["maxAccumulatorMemoryUsageBytes"]; for (let field of Object.keys(maxAccmMemUsages)) { - totalAccmMemoryUsageBytes += maxAccmMemUsages[field]; + totalAccumMemoryUsageBytes += maxAccmMemUsages[field]; // Ensures that the expected accumulators are all included and the corresponding // memory usage is in a reasonable range. Note that in debug mode, data will be @@ -69,8 +69,11 @@ function checkGroupStages(stages, expectedAccumMemUsages, isExecExplain, shouldS } } + // Add some wiggle room to the total memory used compared to the limit parameter since the check + // for spilling to disk happens after each document is processed. if (shouldSpillToDisk) - assert.gt(maxMemoryLimitForGroupStage, totalAccmMemoryUsageBytes); + assert.gt( + maxMemoryLimitForGroupStage + 3 * 1024, totalAccumMemoryUsageBytes, tojson(stages)); } let groupStages = getAggPlanStages(coll.explain("executionStats").aggregate(pipeline), "$group"); |