SERVER-54664 Add per-function execution stats to $setWindowFields stage

author: Nick Zolnierz <nicholas.zolnierz@mongodb.com> 2021-03-31 14:35:10 -0400
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2021-04-30 14:22:03 +0000
commit: f36132ec219dd8cfdab420403f10d18666249947 (patch)
tree: b670698b942e7dbf84e01ae9e1afd061680ec457 /jstests
parent: 3295126e8a081ca57af1a07c6c582fac7a825efd (diff)
download: mongo-f36132ec219dd8cfdab420403f10d18666249947.tar.gz
3 files changed, 156 insertions, 3 deletions
diff --git a/jstests/aggregation/sources/setWindowFields/explain.js b/jstests/aggregation/sources/setWindowFields/explain.js
new file mode 100644
index 00000000000..fbae93889d8
--- /dev/null
+++ b/jstests/aggregation/sources/setWindowFields/explain.js
@@ -0,0 +1,149 @@
+/**
+ * Tests that $setWindowFields stage reports memory footprint per function when explain is run
+ * with verbosities "executionStats" and "allPlansExecution".
+ *
+ * @tags: [assumes_against_mongod_not_mongos]
+ */
+(function() {
+"use strict";
+
+load("jstests/libs/analyze_plan.js");  // For getAggPlanStages().
+
+const coll = db[jsTestName()];
+coll.drop();
+const bigStr = Array(1025).toString();  // 1KB of ','
+const nDocs = 1000;
+const nPartitions = 50;
+const docSize = 8 + 8 + 1024;
+
+const featureEnabled =
+    assert.commandWorked(db.adminCommand({getParameter: 1, featureFlagWindowFunctions: 1}))
+        .featureFlagWindowFunctions.value;
+if (!featureEnabled) {
+    jsTestLog("Skipping test because the window function feature flag is disabled");
+    return;
+}
+
+let bulk = coll.initializeUnorderedBulkOp();
+for (let i = 1; i <= nDocs; i++) {
+    bulk.insert({_id: i, key: i % nPartitions, bigStr: bigStr});
+}
+assert.commandWorked(bulk.execute());
+
+/**
+ * Checks that the execution stats in the explain output for a $setWindowFields stage are as
+ * expected.
+ * - 'stages' is an array of the explain output of $setWindowFields stages.
+ * - 'expectedFunctionMemUsages' is used to check the memory footprint stats for each function.
+ * - 'verbosity' indicates the explain verbosity used.
+ */
+function checkExplainResult(pipeline, expectedFunctionMemUsages, verbosity) {
+    const stages =
+        getAggPlanStages(coll.explain(verbosity).aggregate(pipeline), "$_internalSetWindowFields");
+    for (let stage of stages) {
+        assert(stage.hasOwnProperty("$_internalSetWindowFields"), stage);
+
+        if (verbosity === "executionStats" || verbosity === "allPlansExecution") {
+            assert(stage.hasOwnProperty("maxFunctionMemoryUsageBytes"), stage);
+            const maxFunctionMemUsages = stage["maxFunctionMemoryUsageBytes"];
+            for (let field of Object.keys(maxFunctionMemUsages)) {
+                // Ensures that the expected functions are all included and the corresponding
+                // memory usage is in a reasonable range.
+                if (expectedFunctionMemUsages.hasOwnProperty(field)) {
+                    assert.gt(maxFunctionMemUsages[field],
+                              expectedFunctionMemUsages[field],
+                              "mismatch for function '" + field + "': " + tojson(stage));
+                    assert.lt(maxFunctionMemUsages[field],
+                              2 * expectedFunctionMemUsages[field],
+                              "mismatch for function '" + field + "': " + tojson(stage));
+                }
+            }
+        } else {
+            assert(!stage.hasOwnProperty("maxFunctionMemoryUsageBytes"), stage);
+        }
+    }
+}
+
+(function testQueryPlannerVerbosity() {
+    const pipeline = [
+        {
+            $setWindowFields:
+                {output: {count: {$sum: 1}, push: {$push: "$bigStr"}, set: {$addToSet: "$bigStr"}}}
+        },
+    ];
+    const stages = getAggPlanStages(coll.explain("queryPlanner").aggregate(pipeline),
+                                    "$_internalSetWindowFields");
+    checkExplainResult(stages, {}, "queryPlanner");
+})();
+
+(function testUnboundedMemUsage() {
+    let pipeline = [
+        {
+            $setWindowFields:
+                {output: {count: {$sum: 1}, push: {$push: "$bigStr"}, set: {$addToSet: "$bigStr"}}}
+        },
+    ];
+
+    // The $setWindowFields stage "streams" one partition at a time, so there's only one instance of
+    // each function. For the default [unbounded, unbounded] window type, each function uses memory
+    // usage comparable to it's $group counterpart.
+    let expectedFunctionMemUsages = {
+        count: 60,
+        push: nDocs * 1024,
+        set: 1024,
+    };
+
+    checkExplainResult(pipeline, expectedFunctionMemUsages, "executionStats");
+    checkExplainResult(pipeline, expectedFunctionMemUsages, "allPlansExecution");
+
+    // Test that the memory footprint is reduced with partitioning.
+    pipeline = [
+        {
+            $setWindowFields: {
+                partitionBy: "$key",
+                output: {count: {$sum: 1}, push: {$push: "$bigStr"}, set: {$addToSet: "$bigStr"}}
+            }
+        },
+    ];
+    expectedFunctionMemUsages = {
+        count: 60,
+        push: (nDocs / nPartitions) * 1024,
+        set: 1024,
+    };
+
+    checkExplainResult(pipeline, expectedFunctionMemUsages, "executionStats");
+    checkExplainResult(pipeline, expectedFunctionMemUsages, "allPlansExecution");
+})();
+
+(function testSlidingWindowMemUsage() {
+    const windowSize = 10;
+    let pipeline = [
+        {
+            $setWindowFields: {
+                sortBy: {_id: 1},
+                output: {runningSum: {$sum: "$_id", window: {documents: [-5, 4]}}}
+            }
+        },
+    ];
+    const expectedFunctionMemUsages = {
+        runningSum: windowSize * 16 +
+            160,  // 10x64-bit integer values per window, and 160 for the $sum state.
+    };
+
+    checkExplainResult(pipeline, expectedFunctionMemUsages, "executionStats");
+    checkExplainResult(pipeline, expectedFunctionMemUsages, "allPlansExecution");
+
+    // Adding partitioning doesn't change the peak memory usage.
+    pipeline = [
+        {
+            $setWindowFields: {
+                partitionBy: "$key",
+                sortBy: {_id: 1},
+                output: {runningSum: {$sum: "$_id", window: {documents: [-5, 4]}}}
+            }
+        },
+    ];
+    checkExplainResult(pipeline, expectedFunctionMemUsages, "executionStats");
+    checkExplainResult(pipeline, expectedFunctionMemUsages, "allPlansExecution");
+})();
+}());
diff --git a/jstests/aggregation/sources/setWindowFields/memory_limit.js b/jstests/aggregation/sources/setWindowFields/memory_limit.js
index 6e705beaa4b..f960780d06b 100644
--- a/jstests/aggregation/sources/setWindowFields/memory_limit.js
+++ b/jstests/aggregation/sources/setWindowFields/memory_limit.js
@@ -58,6 +58,7 @@ assert.commandWorked(coll.runCommand({
     }],
     cursor: {}
 }));
+
 // Test that the query fails with a window function that stores documents.
 assert.commandFailedWithCode(coll.runCommand({
     aggregate: coll.getName(),
diff --git a/jstests/noPassthrough/explain_group_stage_exec_stats.js b/jstests/noPassthrough/explain_group_stage_exec_stats.js
index 55be2c77dc2..257d9676baa 100644
--- a/jstests/noPassthrough/explain_group_stage_exec_stats.js
+++ b/jstests/noPassthrough/explain_group_stage_exec_stats.js
@@ -45,7 +45,7 @@ const expectedAccumMemUsages = {
 function checkGroupStages(stages, expectedAccumMemUsages, isExecExplain, shouldSpillToDisk) {
     // Tracks the memory usage per accumulator in total as 'stages' passed in could be the explain
     // output across a cluster.
-    let totalAccmMemoryUsageBytes = 0;
+    let totalAccumMemoryUsageBytes = 0;
 
     for (let stage of stages) {
         assert(stage.hasOwnProperty("$group"), stage);
@@ -54,7 +54,7 @@ function checkGroupStages(stages, expectedAccumMemUsages, isExecExplain, shouldS
             assert(stage.hasOwnProperty("maxAccumulatorMemoryUsageBytes"), stage);
             const maxAccmMemUsages = stage["maxAccumulatorMemoryUsageBytes"];
             for (let field of Object.keys(maxAccmMemUsages)) {
-                totalAccmMemoryUsageBytes += maxAccmMemUsages[field];
+                totalAccumMemoryUsageBytes += maxAccmMemUsages[field];
 
                 // Ensures that the expected accumulators are all included and the corresponding
                 // memory usage is in a reasonable range. Note that in debug mode, data will be
@@ -69,8 +69,11 @@ function checkGroupStages(stages, expectedAccumMemUsages, isExecExplain, shouldS
         }
     }
 
+    // Add some wiggle room to the total memory used compared to the limit parameter since the check
+    // for spilling to disk happens after each document is processed.
     if (shouldSpillToDisk)
-        assert.gt(maxMemoryLimitForGroupStage, totalAccmMemoryUsageBytes);
+        assert.gt(
+            maxMemoryLimitForGroupStage + 3 * 1024, totalAccumMemoryUsageBytes, tojson(stages));
 }
 
 let groupStages = getAggPlanStages(coll.explain("executionStats").aggregate(pipeline), "$group");
author	Nick Zolnierz <nicholas.zolnierz@mongodb.com>	2021-03-31 14:35:10 -0400
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2021-04-30 14:22:03 +0000
commit	f36132ec219dd8cfdab420403f10d18666249947 (patch)
tree	b670698b942e7dbf84e01ae9e1afd061680ec457 /jstests
parent	3295126e8a081ca57af1a07c6c582fac7a825efd (diff)
download	mongo-f36132ec219dd8cfdab420403f10d18666249947.tar.gz