// Tests that an aggregation pipeline can be optimized away and the query can be answered using // just the query layer if the pipeline has only one $cursor source, or if the pipeline can be // collapsed into a single $cursor source pipeline. The resulting cursor in this case will look // like what the client would have gotten from find command. // // Relies on the pipeline stages to be collapsed into a single $cursor stage, so pipelines cannot // be wrapped into a facet stage to not prevent this optimization. // TODO SERVER-40323: Plan analyzer helper functions cannot correctly handle explain output for // sharded collections. // @tags: [do_not_wrap_aggregations_in_facets, assumes_unsharded_collection] (function() { "use strict"; load("jstests/aggregation/extras/utils.js"); // For 'orderedArrayEq' and 'arrayEq'. load("jstests/concurrency/fsm_workload_helpers/server_types.js"); // For isWiredTiger. load("jstests/libs/analyze_plan.js"); // For 'aggPlanHasStage' and other explain helpers. load("jstests/libs/fixture_helpers.js"); // For 'isMongos' and 'isSharded'. const coll = db.optimize_away_pipeline; coll.drop(); assert.writeOK(coll.insert({_id: 1, x: 10})); assert.writeOK(coll.insert({_id: 2, x: 20})); assert.writeOK(coll.insert({_id: 3, x: 30})); // Asserts that the give pipeline has *not* been optimized away and the request is answered // using the aggregation module. There should be pipeline stages present in the explain output. // The functions also asserts that a query stage passed in the 'stage' argument is present in // the explain output. If 'expectedResult' is provided, the pipeline is executed and the // returned result as validated agains the expected result without respecting the order of the // documents. If 'preserveResultOrder' is 'true' - the order is respected. function assertPipelineUsesAggregation({ pipeline = [], pipelineOptions = {}, expectedStage = null, expectedResult = null, preserveResultOrder = false } = {}) { const explainOutput = coll.explain().aggregate(pipeline, pipelineOptions); assert(isAggregationPlan(explainOutput), "Expected pipeline " + tojsononeline(pipeline) + " to use an aggregation framework in the explain output: " + tojson(explainOutput)); assert(!isQueryPlan(explainOutput), "Expected pipeline " + tojsononeline(pipeline) + " *not* to use a query layer at the root level in the explain output: " + tojson(explainOutput)); let cursor = getAggPlanStage(explainOutput, "$cursor"); if (cursor) { cursor = cursor.$cursor; } else { cursor = getAggPlanStage(explainOutput, "$geoNearCursor").$geoNearCursor; } assert(cursor, "Expected pipeline " + tojsononeline(pipeline) + " to include a $cursor " + " stage in the explain output: " + tojson(explainOutput)); assert(cursor.queryPlanner.optimizedPipeline === undefined, "Expected pipeline " + tojsononeline(pipeline) + " to *not* include an " + "'optimizedPipeline' field in the explain output: " + tojson(explainOutput)); assert(aggPlanHasStage(explainOutput, expectedStage), "Expected pipeline " + tojsononeline(pipeline) + " to include a " + expectedStage + " stage in the explain output: " + tojson(explainOutput)); if (expectedResult) { const actualResult = coll.aggregate(pipeline, pipelineOptions).toArray(); assert(preserveResultOrder ? orderedArrayEq(actualResult, expectedResult) : arrayEq(actualResult, expectedResult)); } return explainOutput; } // Asserts that the give pipeline has been optimized away and the request is answered using // just the query module. There should be no pipeline stages present in the explain output. // The functions also asserts that a query stage passed in the 'stage' argument is present in // the explain output. If 'expectedResult' is provided, the pipeline is executed and the // returned result as validated agains the expected result without respecting the order of the // documents. If 'preserveResultOrder' is 'true' - the order is respected. function assertPipelineDoesNotUseAggregation({ pipeline = [], pipelineOptions = {}, expectedStage = null, expectedResult = null, preserveResultOrder = false } = {}) { const explainOutput = coll.explain().aggregate(pipeline, pipelineOptions); assert(!isAggregationPlan(explainOutput), "Expected pipeline " + tojsononeline(pipeline) + " *not* to use an aggregation framework in the explain output: " + tojson(explainOutput)); assert(isQueryPlan(explainOutput), "Expected pipeline " + tojsononeline(pipeline) + " to use a query layer at the root level in the explain output: " + tojson(explainOutput)); if (explainOutput.hasOwnProperty("shards")) { Object.keys(explainOutput.shards) .forEach((shard) => assert(explainOutput.shards[shard].queryPlanner.optimizedPipeline === true, "Expected pipeline " + tojsononeline(pipeline) + " to include an " + "'optimizedPipeline' field in the explain output: " + tojson(explainOutput))); } else { assert(explainOutput.queryPlanner.optimizedPipeline === true, "Expected pipeline " + tojsononeline(pipeline) + " to include an " + "'optimizedPipeline' field in the explain output: " + tojson(explainOutput)); } assert(planHasStage(db, explainOutput, expectedStage), "Expected pipeline " + tojsononeline(pipeline) + " to include a " + expectedStage + " stage in the explain output: " + tojson(explainOutput)); if (expectedResult) { const actualResult = coll.aggregate(pipeline, pipelineOptions).toArray(); assert(preserveResultOrder ? orderedArrayEq(actualResult, expectedResult) : arrayEq(actualResult, expectedResult)); } return explainOutput; } // Test that getMore works with the optimized query. function testGetMore({command = null, expectedResult = null} = {}) { const documents = new DBCommandCursor(db, assert.commandWorked(db.runCommand(command)), 1 /* batchsize */) .toArray(); assert(arrayEq(documents, expectedResult)); } let explainOutput; // Basic pipelines. // Test basic scenarios when a pipeline has a single $cursor stage or can be collapsed into a // single cursor stage. assertPipelineDoesNotUseAggregation({ pipeline: [], expectedStage: "COLLSCAN", expectedResult: [{_id: 1, x: 10}, {_id: 2, x: 20}, {_id: 3, x: 30}] }); assertPipelineDoesNotUseAggregation( {pipeline: [{$match: {x: 20}}], expectedStage: "COLLSCAN", expectedResult: [{_id: 2, x: 20}]}); // Pipelines with a collation. // Test a simple pipeline with a case-insensitive collation. assert.writeOK(coll.insert({_id: 4, x: 40, b: "abc"})); assertPipelineDoesNotUseAggregation({ pipeline: [{$match: {b: "ABC"}}], pipelineOptions: {collation: {locale: "en_US", strength: 2}}, expectedStage: "COLLSCAN", expectedResult: [{_id: 4, x: 40, b: "abc"}] }); assert.commandWorked(coll.deleteOne({_id: 4})); // Pipelines with covered queries. // We can collapse a covered query into a single $cursor when $project and $sort are present and // the latter is near the front of the pipeline. Skip this test in sharded modes as we cannot // correctly handle explain output in plan analyzer helper functions. assert.commandWorked(coll.createIndex({x: 1})); assertPipelineDoesNotUseAggregation({ pipeline: [{$sort: {x: 1}}, {$project: {x: 1, _id: 0}}], expectedStage: "IXSCAN", expectedResult: [{x: 10}, {x: 20}, {x: 30}], preserveResultOrder: true }); assertPipelineDoesNotUseAggregation({ pipeline: [{$match: {x: {$gte: 20}}}, {$sort: {x: 1}}, {$project: {x: 1, _id: 0}}], expectedStage: "IXSCAN", expectedResult: [{x: 20}, {x: 30}], preserveResultOrder: true }); // TODO: SERVER-36723 We cannot collapse if there is a $limit stage though. assertPipelineUsesAggregation({ pipeline: [{$match: {x: {$gte: 20}}}, {$sort: {x: 1}}, {$limit: 1}, {$project: {x: 1, _id: 0}}], expectedStage: "IXSCAN", expectedResult: [{x: 20}] }); assert.commandWorked(coll.dropIndexes()); // Pipelines which cannot be optimized away. // TODO SERVER-40254: Uncovered queries. assert.writeOK(coll.insert({_id: 4, x: 40, a: {b: "ab1"}})); assertPipelineUsesAggregation({ pipeline: [{$project: {x: 1, _id: 0}}], expectedStage: "COLLSCAN", expectedResult: [{x: 10}, {x: 20}, {x: 30}, {x: 40}] }); assertPipelineUsesAggregation({ pipeline: [{$match: {x: 20}}, {$project: {x: 1, _id: 0}}], expectedStage: "COLLSCAN", expectedResult: [{x: 20}] }); assertPipelineUsesAggregation({ pipeline: [{$project: {x: 1, "a.b": 1, _id: 0}}], expectedStage: "COLLSCAN", expectedResult: [{x: 10}, {x: 20}, {x: 30}, {x: 40, a: {b: "ab1"}}] }); assertPipelineUsesAggregation({ pipeline: [{$match: {x: 40}}, {$project: {"a.b": 1, _id: 0}}], expectedStage: "COLLSCAN", expectedResult: [{a: {b: "ab1"}}] }); assert.commandWorked(coll.deleteOne({_id: 4})); // TODO SERVER-36723: $limit stage is not supported yet. assertPipelineUsesAggregation({ pipeline: [{$match: {x: 20}}, {$limit: 1}], expectedStage: "COLLSCAN", expectedResult: [{_id: 2, x: 20}] }); // TODO SERVER-36723: $skip stage is not supported yet. assertPipelineUsesAggregation({ pipeline: [{$match: {x: {$gte: 20}}}, {$skip: 1}], expectedStage: "COLLSCAN", expectedResult: [{_id: 3, x: 30}] }); // We cannot collapse a $project stage if it has a complex pipeline expression. assertPipelineUsesAggregation( {pipeline: [{$project: {x: {$substr: ["$y", 0, 1]}, _id: 0}}], expectedStage: "COLLSCAN"}); assertPipelineUsesAggregation({ pipeline: [{$match: {x: 20}}, {$project: {x: {$substr: ["$y", 0, 1]}, _id: 0}}], expectedStage: "COLLSCAN" }); // We cannot optimize away a pipeline if there are stages which have no equivalent in the // find command. assertPipelineUsesAggregation({ pipeline: [{$match: {x: {$gte: 20}}}, {$count: "count"}], expectedStage: "COLLSCAN", expectedResult: [{count: 2}] }); assertPipelineUsesAggregation({ pipeline: [{$match: {x: {$gte: 20}}}, {$group: {_id: "null", s: {$sum: "$x"}}}], expectedStage: "COLLSCAN", expectedResult: [{_id: "null", s: 50}] }); // TODO SERVER-40253: We cannot optimize away text search queries. assert.commandWorked(coll.createIndex({y: "text"})); assertPipelineUsesAggregation( {pipeline: [{$match: {$text: {$search: "abc"}}}], expectedStage: "IXSCAN"}); assert.commandWorked(coll.dropIndexes()); // We cannot optimize away geo near queries. assert.commandWorked(coll.createIndex({"y": "2d"})); assertPipelineUsesAggregation({ pipeline: [{$geoNear: {near: [0, 0], distanceField: "y", spherical: true}}], expectedStage: "GEO_NEAR_2D" }); assert.commandWorked(coll.dropIndexes()); // getMore cases. // Test getMore on a collection with an optimized away pipeline. testGetMore({ command: {aggregate: coll.getName(), pipeline: [], cursor: {batchSize: 1}}, expectedResult: [{_id: 1, x: 10}, {_id: 2, x: 20}, {_id: 3, x: 30}] }); testGetMore({ command: {aggregate: coll.getName(), pipeline: [{$match: {x: {$gte: 20}}}], cursor: {batchSize: 1}}, expectedResult: [{_id: 2, x: 20}, {_id: 3, x: 30}] }); testGetMore({ command: { aggregate: coll.getName(), pipeline: [{$match: {x: {$gte: 20}}}, {$project: {x: 1, _id: 0}}], cursor: {batchSize: 1} }, expectedResult: [{x: 20}, {x: 30}] }); // Test getMore on a view with an optimized away pipeline. Since views cannot be created when // imlicit sharded collection mode is on, this test will be run only on a non-sharded // collection. let view; if (!FixtureHelpers.isSharded(coll)) { view = db.optimize_away_pipeline_view; view.drop(); assert.commandWorked(db.createView(view.getName(), coll.getName(), [])); testGetMore({ command: {find: view.getName(), filter: {}, batchSize: 1}, expectedResult: [{_id: 1, x: 10}, {_id: 2, x: 20}, {_id: 3, x: 30}] }); } // Test getMore puts a correct namespace into profile data for a colletion with optimized away // pipeline. Cannot be run on mongos as profiling can be enabled only on mongod. Also profiling // is supported on WiredTiger only. if (!FixtureHelpers.isMongos(db) && isWiredTiger(db)) { db.system.profile.drop(); db.setProfilingLevel(2); testGetMore({ command: { aggregate: coll.getName(), pipeline: [{$match: {x: 10}}], cursor: {batchSize: 1}, comment: 'optimize_away_pipeline' }, expectedResult: [{_id: 1, x: 10}] }); db.setProfilingLevel(0); let profile = db.system.profile.find({}, {op: 1, ns: 1, comment: 'optimize_away_pipeline'}) .sort({ts: 1}) .toArray(); assert(arrayEq( profile, [{op: "command", ns: coll.getFullName()}, {op: "getmore", ns: coll.getFullName()}])); // Test getMore puts a correct namespace into profile data for a view with an optimized away // pipeline. if (!FixtureHelpers.isSharded(coll)) { db.system.profile.drop(); db.setProfilingLevel(2); testGetMore({ command: { find: view.getName(), filter: {x: 10}, batchSize: 1, comment: 'optimize_away_pipeline' }, expectedResult: [{_id: 1, x: 10}] }); db.setProfilingLevel(0); profile = db.system.profile.find({}, {op: 1, ns: 1, comment: 'optimize_away_pipeline'}) .sort({ts: 1}) .toArray(); assert(arrayEq( profile, [{op: "query", ns: view.getFullName()}, {op: "getmore", ns: view.getFullName()}])); } } }());