/** * Test a $group query which has a large number of group-by fields and needs to spill to disk. */ (function() { "use strict"; load("jstests/libs/analyze_plan.js"); const MEM_LIMIT_KB = 2; // Make sure that we can handle more than 32 keys (the maximum allowed number of components in a // compound index). const NUM_GROUP_KEYS = 33; // Run a mongod that has a reduced memory limit for when its hash aggregation operators (in both // SBE and the Classic execution engine) will spill data to disk. const memLimit = MEM_LIMIT_KB * 1024; const conn = MongoRunner.runMongod({ setParameter: { internalQuerySlotBasedExecutionHashAggApproxMemoryUseInBytesBeforeSpill: memLimit, internalDocumentSourceGroupMaxMemoryBytes: memLimit } }); assert.neq(conn, null, "mongod failed to start up"); const db = conn.getDB("test"); const coll = db.group_spill_long_keys; function nextFieldName(name) { function nextChar(char) { return String.fromCharCode(char.charCodeAt(0) + 1); } function lastChar(str) { return str[str.length - 1]; } // If the final character is a "z", start using a longer string. Otherwise we cycle through all // possibilities for the last letter. These means we generate only 26 unique names for each // string length, but that's ok since this function will not be used to generate more than ~40 // unique names. if (lastChar(name) === "z") { return "a".repeat(name.length + 1); } else { return name.substr(0, name.length - 1) + nextChar(lastChar(name)); } } let counter = 0; /** * Generates a document with 'NUM_GROUP_KEYS' uniquely named keys. Values are increasingly large * 64-bit integers. */ function generateDoc() { let doc = {}; let str = "a"; for (let i = 0; i < NUM_GROUP_KEYS; ++i) { doc[str] = NumberLong(counter); ++counter; str = nextFieldName(str); } return doc; } // Calculate how many documents we need. We use 100 times the approximate number of documents that // would cause a spill limit in order to cause the query to spill frequently. let exampleDoc = generateDoc(); let docSize = Object.bsonsize(exampleDoc); let docsNeeded = Math.ceil(memLimit / docSize) * 100; coll.drop(); for (let i = 0; i < docsNeeded; ++i) { assert.commandWorked(coll.insert(generateDoc())); } /** * Generates the _id field for a $group query that aggregates on 'NUM_GROUP_KEY' unique keys. The * returned document should look like {a: "$a", b: "$b", ...}. */ const groupKey = (function() { let doc = {}; let str = "a"; for (let i = 0; i < NUM_GROUP_KEYS; ++i) { doc[str] = "$" + str; str = nextFieldName(str); } return doc; }()); const pipeline = [{$group: {_id: groupKey}}]; // Run the query twice and assert that there are as many groups as documents in the collection, // since each document has a unique group key. We run the query twice because the second time it may // use a cached plan. for (let i = 0; i < 2; ++i) { assert.eq(docsNeeded, coll.aggregate(pipeline).itcount()); } // Run an explain. If SBE was used, make sure that we see a "group" stage that spilled in the exec // stats. let explain = coll.explain("executionStats").aggregate(pipeline); assert(explain.hasOwnProperty("explainVersion"), explain); if (explain.explainVersion !== "1") { let hashAgg = getPlanStage(explain.executionStats.executionStages, "group"); // There should be a group-by slot for each field we are grouping by. assert.eq(hashAgg.groupBySlots.length, NUM_GROUP_KEYS, hashAgg); assert.eq(hashAgg.usedDisk, true, hashAgg); assert.gt(hashAgg.spills, 0, hashAgg); assert.gt(hashAgg.spilledRecords, 0, hashAgg); } MongoRunner.stopMongod(conn); }());