diff options
Diffstat (limited to 'jstests')
4 files changed, 593 insertions, 7 deletions
diff --git a/jstests/sharding/analyze_shard_key/libs/query_sampling_util.js b/jstests/sharding/analyze_shard_key/libs/query_sampling_util.js index 51bc566f36c..10c9e8da807 100644 --- a/jstests/sharding/analyze_shard_key/libs/query_sampling_util.js +++ b/jstests/sharding/analyze_shard_key/libs/query_sampling_util.js @@ -27,16 +27,30 @@ var QuerySamplingUtil = (function() { } /** - * Waits for the given mongos to have one active collection for query sampling. + * Waits for the given node to have one active collection for query sampling. */ - function waitForActiveSampling(mongosConn) { + function waitForActiveSampling(node) { assert.soon(() => { - const res = assert.commandWorked(mongosConn.adminCommand({serverStatus: 1})); + const res = assert.commandWorked(node.adminCommand({serverStatus: 1})); return res.queryAnalyzers.activeCollections == 1; }); } /** + * Waits for all shard nodes to have one active collection for query sampling. + */ + function waitForActiveSamplingOnAllShards(st) { + st._rs.forEach(rs => { + rs.nodes.forEach(node => { + assert.soon(() => { + const res = assert.commandWorked(node.adminCommand({serverStatus: 1})); + return res.queryAnalyzers.activeCollections == 1; + }); + }); + }); + } + + /** * Returns true if 'subsetObj' is a sub object of 'supersetObj'. That is, every key that exists * in 'subsetObj' also exists in 'supersetObj' and the values of that key in the two objects are * equal. @@ -133,18 +147,25 @@ var QuerySamplingUtil = (function() { {actualSampledQueryDocs, expectedSampledQueryDocs}); for (let {filter, shardNames, cmdName, cmdObj, diff} of expectedSampledQueryDocs) { + if (!filter) { + // The filer is not specified so skip verifying it. + continue; + } let shardName = null; for (let rs of st._rs) { const primary = rs.test.getPrimary(); - const queryDoc = primary.getCollection(kSampledQueriesNs).findOne(filter); + const queryDocs = primary.getCollection(kSampledQueriesNs).find(filter).toArray(); if (shardName) { - assert.eq(queryDoc, - null, + assert.eq(queryDocs.length, + 0, "Found a sampled query on more than one shard " + tojson({shardNames: [shardName, rs.test.name], cmdName, cmdObj})); continue; - } else if (queryDoc) { + } else if (queryDocs.length > 0) { + assert.eq(queryDocs.length, 1, queryDocs); + const queryDoc = queryDocs[0]; + shardName = rs.test.name; assert(shardNames.includes(shardName), "Found a sampled query on an unexpected shard " + @@ -170,6 +191,18 @@ var QuerySamplingUtil = (function() { assert.eq(coll.find({ns}).itcount(), 0); } + function clearSampledQueryCollection(primary) { + const coll = primary.getCollection(kSampledQueriesNs); + assert.commandWorked(coll.remove({})); + } + + function clearSampledQueryCollectionOnAllShards(st) { + for (let rs of st._rs) { + const primary = rs.test.getPrimary(); + clearSampledQueryCollection(primary); + } + } + /** * Waits for the config.sampledQueriesDiff collection to have a document with _id equal to * 'sampleId' for the collection 'ns', and then asserts that the diff in that document matches @@ -210,9 +243,11 @@ var QuerySamplingUtil = (function() { generateRandomCollation, makeCmdObjIgnoreSessionInfo, waitForActiveSampling, + waitForActiveSamplingOnAllShards, assertSoonSampledQueryDocuments, assertSoonSampledQueryDocumentsAcrossShards, assertNoSampledQueryDocuments, + clearSampledQueryCollectionOnAllShards, assertSoonSingleSampledDiffDocument, assertNoSampledDiffDocuments, clearSampledDiffCollection diff --git a/jstests/sharding/analyze_shard_key/libs/sample_nested_agg_queries_common.js b/jstests/sharding/analyze_shard_key/libs/sample_nested_agg_queries_common.js new file mode 100644 index 00000000000..38a39b3ed2e --- /dev/null +++ b/jstests/sharding/analyze_shard_key/libs/sample_nested_agg_queries_common.js @@ -0,0 +1,305 @@ +/** + * Utilities for testing basic support for sampling nested aggregate queries (i.e. ones inside + * $lookup, $graphLookup, $unionWith) on a sharded cluster. + */ + +load("jstests/libs/sbe_util.js"); // For checkSBEEnabled. + +// Make the periodic jobs for refreshing sample rates and writing sampled queries and diffs have a +// period of 1 second to speed up the test. +const queryAnalysisSamplerConfigurationRefreshSecs = 1; +const queryAnalysisWriterIntervalSecs = 1; + +const outerAggTestCases = [ + // The test cases for singly-nested aggregate queries. + { + name: "lookup_custom_pipeline", + supportCustomPipeline: true, + makeOuterPipelineFunc: (localCollName, foreignCollName, pipeline) => { + return [{$lookup: {from: foreignCollName, as: "joined", pipeline}}]; + }, + requireShardToRouteFunc: (db, collName, isShardedColl) => true + }, + { + name: "lookup_non_custom_pipeline", + supportCustomPipeline: false, + makeOuterPipelineFunc: (localCollName, foreignCollName, pipeline) => { + return [ + {$lookup: {from: foreignCollName, as: "joined", localField: "a", foreignField: "x"}} + ]; + }, + requireShardToRouteFunc: (db, collName, isShardedColl) => { + // When SBE is enabled, if the collection is not sharded and not clustered, the shard + // will not create a separate pipeline to execute the inner side of a $lookup stage so + // there is no nested aggregate query to route. + const listCollectionRes = + assert.commandWorked(db.runCommand({listCollections: 1, filter: {name: collName}})); + const isClusteredColl = + listCollectionRes.cursor.firstBatch[0].options.hasOwnProperty("clusteredIndex"); + const isEligibleForSBELookupPushdown = + checkSBEEnabled(db) && !isShardedColl && !isClusteredColl; + return !isEligibleForSBELookupPushdown; + } + }, + { + name: "unionWith", + supportCustomPipeline: true, + makeOuterPipelineFunc: (localCollName, foreignCollName, pipeline) => { + return [{$unionWith: {coll: foreignCollName, pipeline}}]; + }, + requireShardToRouteFunc: (db, collName, isShardedColl) => true + }, + { + name: "graphLookup", + supportCustomPipeline: false, + makeOuterPipelineFunc: (localCollName, foreignCollName) => { + return [{ + $graphLookup: { + from: foreignCollName, + startWith: "$x", + connectFromField: "x", + connectToField: "a", + maxDepth: 1, + as: "connected" + } + }]; + }, + requireShardToRouteFunc: (db, collName, isShardedColl) => true + }, + // The test cases for doubly-nested aggregate queries. + { + name: "lookup+lookup", + supportCustomPipeline: true, + makeOuterPipelineFunc: (localCollName, foreignCollName, pipeline) => { + return [{ + $lookup: { + from: localCollName, + as: "joined", + pipeline: [{ + $lookup: { + from: foreignCollName, + as: "joined", + pipeline + } + }] + } + }]; + }, + requireShardToRouteFunc: (db, collName, isShardedColl) => true + }, + { + name: "lookup+unionWith", + supportCustomPipeline: true, + makeOuterPipelineFunc: (localCollName, foreignCollName, pipeline) => { + return [{ + $lookup: { + from: localCollName, + as: "joined", + pipeline: [{ + $unionWith: { + coll: foreignCollName, + pipeline + } + }] + } + }]; + }, + requireShardToRouteFunc: (db, collName, isShardedColl) => true + }, + { + name: "lookup+graphLookUp", + supportCustomPipeline: false, + makeOuterPipelineFunc: (localCollName, foreignCollName, pipeline) => { + return [{ + $lookup: { + from: localCollName, + as: "joined", + pipeline: [{ + $graphLookup: { + from: foreignCollName, + startWith: "$x", + connectFromField: "x", + connectToField: "a", + maxDepth: 1, + as: "connected" + } + }] + } + }]; + }, + requireShardToRouteFunc: (db, collName, isShardedColl) => true + }, + { + name: "unionWith+lookup", + supportCustomPipeline: true, + makeOuterPipelineFunc: (localCollName, foreignCollName, pipeline) => { + return [{ + $unionWith: { + coll: localCollName, + pipeline: [{$lookup: {from: foreignCollName, as: "joined", pipeline}}] + } + }]; + }, + requireShardToRouteFunc: (db, collName, isShardedColl) => true + }, + { + name: "unionWith+unionWith", + supportCustomPipeline: true, + makeOuterPipelineFunc: (localCollName, foreignCollName, pipeline) => { + return [{ + $unionWith: { + coll: localCollName, + pipeline: [{$unionWith: {coll: foreignCollName, pipeline}}] + } + }]; + }, + requireShardToRouteFunc: (db, collName, isShardedColl) => true + }, + { + name: "unionWith+graphLookup", + supportCustomPipeline: false, + makeOuterPipelineFunc: (localCollName, foreignCollName, pipeline) => { + return [{ + $unionWith: { + coll: localCollName, + pipeline: [{ + $graphLookup: { + from: foreignCollName, + startWith: "$x", + connectFromField: "x", + connectToField: "a", + maxDepth: 1, + as: "connected" + } + }] + } + }]; + }, + requireShardToRouteFunc: (db, collName, isShardedColl) => true, + }, + { + name: "facet", + supportCustomPipeline: false, + makeOuterPipelineFunc: (localCollName, foreignCollName, pipeline) => { + return [{$facet: {foo: [{$match: {}}]}}]; + }, + requireShardToRouteFunc: (db, collName, isShardedColl) => false, + } +]; + +const innerAggTestCases = [ + { + // The filter is in the first stage. + containInitialFilter: true, + makeInnerPipelineFunc: (filter) => { + return [{$match: filter}]; + } + }, + { + // The filter is not in the first stage but the stage that it is in is moveable. + containInitialFilter: true, + makeInnerPipelineFunc: (filter) => { + return [{$sort: {x: -1}}, {$match: filter}]; + } + }, + { + // The filter is not in the first stage and the stage that it is in is not moveable. + containInitialFilter: false, + makeInnerPipelineFunc: (filter) => { + return [{$_internalInhibitOptimization: {}}, {$match: filter}]; + } + } +]; + +/** + * Tests that a nested aggregate query run internally by an aggregation stage that takes in a + * "pipeline" is sampled correctly. + */ +function testCustomInnerPipeline(makeOuterPipelineFunc, + makeInnerPipelineFunc, + containInitialFilter, + st, + dbName, + localCollName, + foreignCollName, + filter, + shardNames, + explain, + requireShardToRoute) { + const mongosDB = st.s.getDB(dbName); + const foreignNs = dbName + "." + foreignCollName; + const foreignCollUuid = QuerySamplingUtil.getCollectionUuid(mongosDB, foreignCollName); + + let expectedSampledQueryDocs = []; + + const collation = QuerySamplingUtil.generateRandomCollation(); + const innerPipeline = makeInnerPipelineFunc(filter); + const outerPipeline = makeOuterPipelineFunc(localCollName, foreignCollName, innerPipeline); + const originalCmdObj = + {aggregate: localCollName, pipeline: outerPipeline, collation, cursor: {}}; + + jsTest.log("Testing command " + tojsononeline({originalCmdObj, explain})); + assert.commandWorked(mongosDB.runCommand(explain ? {explain: originalCmdObj} : originalCmdObj)); + + // Queries that are part of an 'explain' or that do not require shards to route should not get + // sampled. + if (!explain && requireShardToRoute) { + const expectedFilter = containInitialFilter ? filter : {}; + const expectedDoc = { + cmdName: "aggregate", + cmdObj: {filter: expectedFilter, collation}, + shardNames + }; + if (filter) { + expectedDoc.filter = {"cmd.filter": expectedFilter}; + } + expectedSampledQueryDocs.push(expectedDoc); + } + + sleep(queryAnalysisWriterIntervalSecs * 1000); + QuerySamplingUtil.assertSoonSampledQueryDocumentsAcrossShards( + st, foreignNs, foreignCollUuid, ["aggregate"], expectedSampledQueryDocs); + QuerySamplingUtil.clearSampledQueryCollectionOnAllShards(st); +} + +/** + * Tests that a nested aggregate query run internally by an aggregation stage that does not take in + * a "pipeline" is sampled correctly. + */ +function testNoCustomInnerPipeline(makeOuterPipelineFunc, + st, + dbName, + localCollName, + foreignCollName, + explain, + requireShardToRoute) { + const mongosDB = st.s.getDB(dbName); + const foreignNs = dbName + "." + foreignCollName; + const foreignCollUuid = QuerySamplingUtil.getCollectionUuid(mongosDB, foreignCollName); + + let expectedSampledQueryDocs = []; + + const collation = QuerySamplingUtil.generateRandomCollation(); + const outerPipeline = makeOuterPipelineFunc(localCollName, foreignCollName); + const originalCmdObj = + {aggregate: localCollName, pipeline: outerPipeline, collation, cursor: {}}; + + jsTest.log("Testing command " + tojsononeline({originalCmdObj, explain})); + assert.commandWorked(mongosDB.runCommand(explain ? {explain: originalCmdObj} : originalCmdObj)); + + // Queries that are part of an 'explain' or that do not require shards to route should not get + // sampled. + if (!explain && requireShardToRoute) { + // Out of the aggregation stages above, the only stages that doesn't take in a custom + // pipeline are $graphLookup and $lookup (without a "pipeline" field). To avoid relying on + // the current format of the $match filter that they internally construct, skip verifying + // the filter and only verify that the query is present in the config.sampledQueries + // collection. + expectedSampledQueryDocs.push({}); + } + + sleep(queryAnalysisWriterIntervalSecs * 1000); + QuerySamplingUtil.assertSoonSampledQueryDocumentsAcrossShards( + st, foreignNs, foreignCollUuid, ["aggregate"], expectedSampledQueryDocs); + QuerySamplingUtil.clearSampledQueryCollectionOnAllShards(st); +} diff --git a/jstests/sharding/analyze_shard_key/sample_nested_agg_queries_sharded.js b/jstests/sharding/analyze_shard_key/sample_nested_agg_queries_sharded.js new file mode 100644 index 00000000000..58efb7c3565 --- /dev/null +++ b/jstests/sharding/analyze_shard_key/sample_nested_agg_queries_sharded.js @@ -0,0 +1,143 @@ +/** + * Tests basic support for sampling nested aggregate queries (i.e. ones inside $lookup, + * $graphLookup, $unionWith) against a sharded collection on a sharded cluster. + * + * @tags: [requires_fcv_62, featureFlagAnalyzeShardKey] + */ +(function() { +"use strict"; + +load("jstests/sharding/analyze_shard_key/libs/sample_nested_agg_queries_common.js"); +load("jstests/sharding/analyze_shard_key/libs/query_sampling_util.js"); + +const st = new ShardingTest({ + shards: 3, + rs: { + nodes: 2, + setParameter: + {queryAnalysisSamplerConfigurationRefreshSecs, queryAnalysisWriterIntervalSecs} + }, + // Disable query sampling on mongos to verify that the nested aggregate queries are sampled by + // the shard that routes them. + mongosOptions: + {setParameter: {"failpoint.disableQueryAnalysisSampler": tojson({mode: "alwaysOn"})}} +}); + +const dbName = "testDb"; +const localCollName = "testLocalColl"; +const foreignCollName = "testForeignColl"; +const foreignNs = dbName + "." + foreignCollName; +const mongosDB = st.s.getDB(dbName); + +// Set up the local collection. It needs to have at least one document. Otherwise, no nested +// aggregate queries would be issued. +assert.commandWorked(mongosDB.getCollection(localCollName).insert([{a: 0}])); + +// Set up the foreign collection. Make it have three chunks: +// shard0: [MinKey, 0] +// shard1: [0, 1000] +// shard1: [1000, MaxKey] +assert.commandWorked(st.s.adminCommand({enableSharding: dbName})); +st.ensurePrimaryShard(dbName, st.shard0.name); +assert.commandWorked(st.s.adminCommand({shardCollection: foreignNs, key: {x: 1}})); +assert.commandWorked(st.s.adminCommand({split: foreignNs, middle: {x: 0}})); +assert.commandWorked(st.s.adminCommand({split: foreignNs, middle: {x: 1000}})); +assert.commandWorked( + st.s.adminCommand({moveChunk: foreignNs, find: {x: 0}, to: st.shard1.shardName})); +assert.commandWorked( + st.s.adminCommand({moveChunk: foreignNs, find: {x: 1000}, to: st.shard2.name})); + +assert.commandWorked( + st.s.adminCommand({configureQueryAnalyzer: foreignNs, mode: "full", sampleRate: 1000})); +QuerySamplingUtil.waitForActiveSamplingOnAllShards(st); + +for (let {name, + makeOuterPipelineFunc, + requireShardToRouteFunc, + supportCustomPipeline} of outerAggTestCases) { + const requireShardToRoute = + requireShardToRouteFunc(mongosDB, foreignCollName, true /* isShardedColl */); + if (supportCustomPipeline) { + for (let {makeInnerPipelineFunc, containInitialFilter} of innerAggTestCases) { + const filter0 = {x: 1, name}; + // If the aggregation doesn't have an initial filter, it would be routed to all shards. + const shardNames0 = + containInitialFilter ? [st.rs1.name] : [st.rs0.name, st.rs1.name, st.rs2.name]; + testCustomInnerPipeline(makeOuterPipelineFunc, + makeInnerPipelineFunc, + containInitialFilter, + st, + dbName, + localCollName, + foreignCollName, + filter0, + shardNames0, + false /* explain */, + requireShardToRoute); + + const filter1 = {x: {$gte: 2}, name}; + // If the aggregation doesn't have an initial filter, it would be routed to all shards. + const shardNames1 = containInitialFilter ? [st.rs1.name, st.rs2.name] + : [st.rs0.name, st.rs1.name, st.rs2.name]; + testCustomInnerPipeline(makeOuterPipelineFunc, + makeInnerPipelineFunc, + containInitialFilter, + st, + dbName, + localCollName, + foreignCollName, + filter1, + shardNames1, + false /* explain */, + requireShardToRoute); + + const filter2 = {x: 3, name}; + const shardNames2 = []; + testCustomInnerPipeline(makeOuterPipelineFunc, + makeInnerPipelineFunc, + containInitialFilter, + st, + dbName, + localCollName, + foreignCollName, + filter2, + shardNames2, + true /* explain */, + requireShardToRoute); + + const filter3 = {x: {$gte: 4}, name}; + const shardNames3 = []; + testCustomInnerPipeline(makeOuterPipelineFunc, + makeInnerPipelineFunc, + containInitialFilter, + st, + dbName, + localCollName, + foreignCollName, + filter3, + shardNames3, + true /* explain */, + requireShardToRoute); + } + } else { + testNoCustomInnerPipeline(makeOuterPipelineFunc, + st, + dbName, + localCollName, + foreignCollName, + false /* explain */, + requireShardToRoute); + testNoCustomInnerPipeline(makeOuterPipelineFunc, + st, + dbName, + localCollName, + foreignCollName, + true /* explain */, + requireShardToRoute); + } +} + +assert.commandWorked(st.s.adminCommand({configureQueryAnalyzer: foreignNs, mode: "off"})); + +st.stop(); +})(); diff --git a/jstests/sharding/analyze_shard_key/sample_nested_agg_queries_unsharded.js b/jstests/sharding/analyze_shard_key/sample_nested_agg_queries_unsharded.js new file mode 100644 index 00000000000..5b4626a12f1 --- /dev/null +++ b/jstests/sharding/analyze_shard_key/sample_nested_agg_queries_unsharded.js @@ -0,0 +1,103 @@ +/** + * Tests basic support for sampling nested aggregate queries (i.e. inside $lookup, $graphLookup, + * $unionWith) against an unsharded collection on a sharded cluster. + * + * @tags: [requires_fcv_62, featureFlagAnalyzeShardKey] + */ +(function() { +"use strict"; + +load("jstests/sharding/analyze_shard_key/libs/sample_nested_agg_queries_common.js"); +load("jstests/sharding/analyze_shard_key/libs/query_sampling_util.js"); + +const st = new ShardingTest({ + shards: 2, + rs: { + nodes: 2, + setParameter: + {queryAnalysisSamplerConfigurationRefreshSecs, queryAnalysisWriterIntervalSecs} + }, + // Disable query sampling on mongos to verify that the nested aggregate queries are sampled by + // the shard that routes them. + mongosOptions: + {setParameter: {"failpoint.disableQueryAnalysisSampler": tojson({mode: "alwaysOn"})}} +}); + +const dbName = "testDb"; +const localCollName = "testLocalColl"; +const foreignCollName = "testForeignColl"; +const foreignNs = dbName + "." + foreignCollName; +const mongosDB = st.s.getDB(dbName); + +// Set up the local collection. It needs to have at least one document. Otherwise, no nested +// aggregate queries will be issued. +assert.commandWorked(mongosDB.getCollection(localCollName).insert([{a: 0}])); + +// Set up the foreign collection. +assert.commandWorked(st.s.adminCommand({enableSharding: dbName})); +st.ensurePrimaryShard(dbName, st.shard0.name); +assert.commandWorked(mongosDB.createCollection(foreignCollName)); + +assert.commandWorked( + st.s.adminCommand({configureQueryAnalyzer: foreignNs, mode: "full", sampleRate: 1000})); +QuerySamplingUtil.waitForActiveSamplingOnAllShards(st); + +// The foreign collection is unsharded so all documents are on the primary shard. +const shardNames = [st.rs0.name]; + +for (let {name, + makeOuterPipelineFunc, + requireShardToRouteFunc, + supportCustomPipeline} of outerAggTestCases) { + const requireShardToRoute = + requireShardToRouteFunc(mongosDB, foreignCollName, false /* isShardedColl */); + if (supportCustomPipeline) { + for (let {makeInnerPipelineFunc, containInitialFilter} of innerAggTestCases) { + const filter0 = {x: 1, name}; + testCustomInnerPipeline(makeOuterPipelineFunc, + makeInnerPipelineFunc, + containInitialFilter, + st, + dbName, + localCollName, + foreignCollName, + filter0, + shardNames, + false /* explain */, + requireShardToRoute); + + const filter1 = {x: 2, name}; + testCustomInnerPipeline(makeOuterPipelineFunc, + makeInnerPipelineFunc, + containInitialFilter, + st, + dbName, + localCollName, + foreignCollName, + filter1, + shardNames, + true /* explain */, + requireShardToRoute); + } + } else { + testNoCustomInnerPipeline(makeOuterPipelineFunc, + st, + dbName, + localCollName, + foreignCollName, + false /* explain */, + requireShardToRoute); + testNoCustomInnerPipeline(makeOuterPipelineFunc, + st, + dbName, + localCollName, + foreignCollName, + true /* explain */, + requireShardToRoute); + } +} + +assert.commandWorked(st.s.adminCommand({configureQueryAnalyzer: foreignNs, mode: "off"})); + +st.stop(); +})(); |