From 87160f876c6fb94f5d03062b2caee57539ec5d8e Mon Sep 17 00:00:00 2001 From: "liubov.molchanova" Date: Wed, 17 May 2023 08:16:48 +0000 Subject: Revert "SERVER-76427: Rename $telemetry to $queryStats" This reverts commit d646e44b7801a3e5b3230bbae7dcfe05a5ed8707. --- .../resmokeconfig/suites/telemetry_passthrough.yml | 2 +- jstests/auth/lib/commands_lib.js | 6 +- jstests/libs/telemetry_utils.js | 6 +- .../queryStats/application_name_find.js | 39 - .../queryStats/clear_query_stats_store.js | 45 - ...ocumentSourceQueryStats_redaction_parameters.js | 100 -- .../feature_flag_off_sampling_rate_on.js | 54 - .../queryStats/query_stats_collect_on_mongos.js | 277 ----- .../queryStats/query_stats_feature_flag.js | 34 - .../noPassthrough/queryStats/query_stats_key.js | 111 -- .../query_stats_metrics_across_getMore_calls.js | 159 --- .../queryStats/query_stats_redact_find_cmd.js | 69 -- .../queryStats/query_stats_sampling_rate.js | 38 - .../query_stats_server_status_metrics.js | 186 ---- .../queryStats/query_stats_upgrade.js | 43 - .../redact_queries_with_nonobject_fields.js | 76 -- .../telemetry/application_name_find.js | 39 + .../telemetry/clear_telemetry_store.js | 46 + ...documentSourceTelemetry_redaction_parameters.js | 100 ++ .../telemetry/feature_flag_off_sampling_rate_on.js | 54 + jstests/noPassthrough/telemetry/query_stats_key.js | 111 ++ .../redact_queries_with_nonobject_fields.js | 76 ++ .../telemetry/telemetry_collect_on_mongos.js | 275 +++++ .../telemetry/telemetry_feature_flag.js | 34 + .../telemetry_metrics_across_getMore_calls.js | 159 +++ .../telemetry/telemetry_redact_find_cmd.js | 69 ++ .../telemetry/telemetry_sampling_rate.js | 38 + .../telemetry/telemetry_server_status_metrics.js | 190 ++++ .../noPassthrough/telemetry/telemetry_upgrade.js | 43 + .../telemetry_configuration.js | 5 +- src/mongo/db/auth/action_type.idl | 2 +- src/mongo/db/auth/builtin_roles.yml | 2 +- src/mongo/db/clientcursor.cpp | 36 +- src/mongo/db/clientcursor.h | 14 +- src/mongo/db/commands/find_cmd.cpp | 21 +- src/mongo/db/commands/getmore_cmd.cpp | 2 +- src/mongo/db/commands/run_aggregate.cpp | 18 +- src/mongo/db/curop.h | 8 +- src/mongo/db/cursor_manager.cpp | 8 +- src/mongo/db/pipeline/SConscript | 4 +- .../db/pipeline/abt/document_source_visitor.cpp | 2 +- src/mongo/db/pipeline/aggregate_command.idl | 6 +- .../db/pipeline/aggregate_request_shapifier.cpp | 12 +- .../db/pipeline/aggregate_request_shapifier.h | 11 +- .../db/pipeline/document_source_query_stats.cpp | 215 ---- .../db/pipeline/document_source_query_stats.h | 147 --- .../pipeline/document_source_query_stats_test.cpp | 95 -- .../db/pipeline/document_source_telemetry.cpp | 215 ++++ src/mongo/db/pipeline/document_source_telemetry.h | 147 +++ .../db/pipeline/document_source_telemetry_test.cpp | 95 ++ .../document_source_visitor_registry_mongod.h | 4 +- src/mongo/db/query/SConscript | 6 +- src/mongo/db/query/cqf_command_utils.cpp | 2 +- src/mongo/db/query/find.cpp | 6 +- src/mongo/db/query/find_request_shapifier.cpp | 12 +- src/mongo/db/query/find_request_shapifier.h | 11 +- src/mongo/db/query/query_feature_flags.idl | 6 +- src/mongo/db/query/query_knobs.idl | 28 +- src/mongo/db/query/query_shape.cpp | 2 +- src/mongo/db/query/query_shape.h | 2 +- src/mongo/db/query/query_stats.cpp | 555 ---------- src/mongo/db/query/query_stats.h | 224 ---- src/mongo/db/query/query_stats_store_test.cpp | 1164 -------------------- src/mongo/db/query/query_stats_util.cpp | 97 -- src/mongo/db/query/query_stats_util.h | 93 -- src/mongo/db/query/request_shapifier.h | 20 +- src/mongo/db/query/telemetry.cpp | 555 ++++++++++ src/mongo/db/query/telemetry.h | 224 ++++ src/mongo/db/query/telemetry_store_test.cpp | 1163 +++++++++++++++++++ src/mongo/db/query/telemetry_util.cpp | 97 ++ src/mongo/db/query/telemetry_util.h | 93 ++ src/mongo/s/commands/cluster_find_cmd.h | 12 +- src/mongo/s/query/cluster_aggregate.cpp | 4 +- src/mongo/s/query/cluster_aggregation_planner.cpp | 8 +- src/mongo/s/query/cluster_client_cursor.h | 4 +- src/mongo/s/query/cluster_client_cursor_impl.cpp | 27 +- src/mongo/s/query/cluster_client_cursor_impl.h | 10 +- src/mongo/s/query/cluster_client_cursor_mock.cpp | 2 +- src/mongo/s/query/cluster_client_cursor_mock.h | 2 +- src/mongo/s/query/cluster_cursor_manager.cpp | 18 +- src/mongo/s/query/cluster_cursor_manager.h | 10 +- src/mongo/s/query/cluster_find.cpp | 8 +- src/mongo/s/query/store_possible_cursor.cpp | 4 +- 83 files changed, 4008 insertions(+), 4009 deletions(-) delete mode 100644 jstests/noPassthrough/queryStats/application_name_find.js delete mode 100644 jstests/noPassthrough/queryStats/clear_query_stats_store.js delete mode 100644 jstests/noPassthrough/queryStats/documentSourceQueryStats_redaction_parameters.js delete mode 100644 jstests/noPassthrough/queryStats/feature_flag_off_sampling_rate_on.js delete mode 100644 jstests/noPassthrough/queryStats/query_stats_collect_on_mongos.js delete mode 100644 jstests/noPassthrough/queryStats/query_stats_feature_flag.js delete mode 100644 jstests/noPassthrough/queryStats/query_stats_key.js delete mode 100644 jstests/noPassthrough/queryStats/query_stats_metrics_across_getMore_calls.js delete mode 100644 jstests/noPassthrough/queryStats/query_stats_redact_find_cmd.js delete mode 100644 jstests/noPassthrough/queryStats/query_stats_sampling_rate.js delete mode 100644 jstests/noPassthrough/queryStats/query_stats_server_status_metrics.js delete mode 100644 jstests/noPassthrough/queryStats/query_stats_upgrade.js delete mode 100644 jstests/noPassthrough/queryStats/redact_queries_with_nonobject_fields.js create mode 100644 jstests/noPassthrough/telemetry/application_name_find.js create mode 100644 jstests/noPassthrough/telemetry/clear_telemetry_store.js create mode 100644 jstests/noPassthrough/telemetry/documentSourceTelemetry_redaction_parameters.js create mode 100644 jstests/noPassthrough/telemetry/feature_flag_off_sampling_rate_on.js create mode 100644 jstests/noPassthrough/telemetry/query_stats_key.js create mode 100644 jstests/noPassthrough/telemetry/redact_queries_with_nonobject_fields.js create mode 100644 jstests/noPassthrough/telemetry/telemetry_collect_on_mongos.js create mode 100644 jstests/noPassthrough/telemetry/telemetry_feature_flag.js create mode 100644 jstests/noPassthrough/telemetry/telemetry_metrics_across_getMore_calls.js create mode 100644 jstests/noPassthrough/telemetry/telemetry_redact_find_cmd.js create mode 100644 jstests/noPassthrough/telemetry/telemetry_sampling_rate.js create mode 100644 jstests/noPassthrough/telemetry/telemetry_server_status_metrics.js create mode 100644 jstests/noPassthrough/telemetry/telemetry_upgrade.js delete mode 100644 src/mongo/db/pipeline/document_source_query_stats.cpp delete mode 100644 src/mongo/db/pipeline/document_source_query_stats.h delete mode 100644 src/mongo/db/pipeline/document_source_query_stats_test.cpp create mode 100644 src/mongo/db/pipeline/document_source_telemetry.cpp create mode 100644 src/mongo/db/pipeline/document_source_telemetry.h create mode 100644 src/mongo/db/pipeline/document_source_telemetry_test.cpp delete mode 100644 src/mongo/db/query/query_stats.cpp delete mode 100644 src/mongo/db/query/query_stats.h delete mode 100644 src/mongo/db/query/query_stats_store_test.cpp delete mode 100644 src/mongo/db/query/query_stats_util.cpp delete mode 100644 src/mongo/db/query/query_stats_util.h create mode 100644 src/mongo/db/query/telemetry.cpp create mode 100644 src/mongo/db/query/telemetry.h create mode 100644 src/mongo/db/query/telemetry_store_test.cpp create mode 100644 src/mongo/db/query/telemetry_util.cpp create mode 100644 src/mongo/db/query/telemetry_util.h diff --git a/buildscripts/resmokeconfig/suites/telemetry_passthrough.yml b/buildscripts/resmokeconfig/suites/telemetry_passthrough.yml index 08fa435a07d..1aa2a490a5f 100644 --- a/buildscripts/resmokeconfig/suites/telemetry_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/telemetry_passthrough.yml @@ -27,4 +27,4 @@ executor: mongod_options: set_parameters: enableTestCommands: 1 - internalQueryStatsSamplingRate: -1 + internalQueryConfigureTelemetrySamplingRate: -1 diff --git a/jstests/auth/lib/commands_lib.js b/jstests/auth/lib/commands_lib.js index 1d60c7aa308..170223762b4 100644 --- a/jstests/auth/lib/commands_lib.js +++ b/jstests/auth/lib/commands_lib.js @@ -6629,12 +6629,12 @@ export const authCommandsLib = { ] }, { - // Test that only clusterManager has permission to run $queryStats + // Test that only clusterManager has permission to run $telemetry testname: "testTelemetryReadPrivilege", - command: {aggregate: 1, pipeline: [{$queryStats: {}}], cursor: {}}, + command: {aggregate: 1, pipeline: [{$telemetry: {}}], cursor: {}}, skipSharded: false, skipTest: (conn) => { - return !TestData.setParameters.featureFlagQueryStats; + return !TestData.setParameters.featureFlagTelemetry; }, testcases: [{runOnDb: adminDbName, roles: roles_clusterManager}] }, diff --git a/jstests/libs/telemetry_utils.js b/jstests/libs/telemetry_utils.js index 0bb9e90fb58..11e2d236827 100644 --- a/jstests/libs/telemetry_utils.js +++ b/jstests/libs/telemetry_utils.js @@ -45,7 +45,7 @@ function getTelemetry(conn) { const result = conn.adminCommand({ aggregate: 1, pipeline: [ - {$queryStats: {}}, + {$telemetry: {}}, // Sort on telemetry key so entries are in a deterministic order. {$sort: {key: 1}}, {$match: {"key.applicationName": kApplicationName}} @@ -62,7 +62,7 @@ function getTelemetryRedacted( hmacKey = BinData(0, "MjM0NTY3ODkxMDExMTIxMzE0MTUxNjE3MTgxOTIwMjE=")) { // Hashed application name is generated using the default hmacKey argument. const kApplicationName = "MongoDB Shell"; - // Filter out agg queries, including $queryStats. + // Filter out agg queries, including $telemetry. const match = { $match: {"key.queryShape.command": "find", "key.applicationName": kApplicationName} }; @@ -70,7 +70,7 @@ function getTelemetryRedacted( const result = conn.adminCommand({ aggregate: 1, pipeline: [ - {$queryStats: {applyHmacToIdentifiers: applyHmacToIdentifiers, hmacKey: hmacKey}}, + {$telemetry: {applyHmacToIdentifiers: applyHmacToIdentifiers, hmacKey: hmacKey}}, match, // Sort on telemetry key so entries are in a deterministic order. {$sort: {key: 1}}, diff --git a/jstests/noPassthrough/queryStats/application_name_find.js b/jstests/noPassthrough/queryStats/application_name_find.js deleted file mode 100644 index 36245a31514..00000000000 --- a/jstests/noPassthrough/queryStats/application_name_find.js +++ /dev/null @@ -1,39 +0,0 @@ -/** - * Test that applicationName and namespace appear in telemetry for the find command. - * @tags: [featureFlagQueryStats] - */ -load("jstests/libs/telemetry_utils.js"); -(function() { -"use strict"; - -const kApplicationName = "MongoDB Shell"; -const kHashedCollName = "w6Ax20mVkbJu4wQWAMjL8Sl+DfXAr2Zqdc3kJRB7Oo0="; -const kHashedFieldName = "lU7Z0mLRPRUL+RfAD5jhYPRRpXBsZBxS/20EzDwfOG4="; - -// Turn on the collecting of telemetry metrics. -let options = { - setParameter: {internalQueryStatsSamplingRate: -1}, -}; - -const conn = MongoRunner.runMongod(options); -conn.setLogLevel(3, "query"); -const testDB = conn.getDB('test'); -var coll = testDB[jsTestName()]; -coll.drop(); - -coll.insert({v: 1}); -coll.insert({v: 2}); -coll.insert({v: 3}); - -coll.find({v: 1}).toArray(); - -let telemetry = getTelemetry(conn); -assert.eq(1, telemetry.length, telemetry); -assert.eq(kApplicationName, telemetry[0].key.applicationName, telemetry); - -telemetry = getTelemetryRedacted(conn, true); -assert.eq(1, telemetry.length, telemetry); -assert.eq(kApplicationName, telemetry[0].key.applicationName, telemetry); - -MongoRunner.stopMongod(conn); -}()); diff --git a/jstests/noPassthrough/queryStats/clear_query_stats_store.js b/jstests/noPassthrough/queryStats/clear_query_stats_store.js deleted file mode 100644 index 056c565ec02..00000000000 --- a/jstests/noPassthrough/queryStats/clear_query_stats_store.js +++ /dev/null @@ -1,45 +0,0 @@ -/** - * Test that the telemetry store can be cleared when the cache size is reset to 0. - * @tags: [featureFlagQueryStats] - */ -load("jstests/libs/telemetry_utils.js"); // For verifyMetrics. - -(function() { -"use strict"; - -// Turn on the collecting of telemetry metrics. -let options = { - setParameter: - {internalQueryStatsSamplingRate: -1, internalQueryConfigureQueryStatsCacheSize: "10MB"}, -}; - -const conn = MongoRunner.runMongod(options); -const testDB = conn.getDB('test'); -var coll = testDB[jsTestName()]; -coll.drop(); - -let query = {}; -for (var j = 0; j < 10; ++j) { - query["foo.field.xyz." + j] = 1; - query["bar.field.xyz." + j] = 2; - query["baz.field.xyz." + j] = 3; - coll.aggregate([{$match: query}]).itcount(); -} - -// Confirm number of entries in the store and that none have been evicted. -let telemetryResults = testDB.getSiblingDB("admin").aggregate([{$queryStats: {}}]).toArray(); -assert.eq(telemetryResults.length, 10, telemetryResults); -assert.eq(testDB.serverStatus().metrics.queryStats.numEvicted, 0); - -// Command to clear the cache. -assert.commandWorked( - testDB.adminCommand({setParameter: 1, internalQueryConfigureQueryStatsCacheSize: "0MB"})); - -// 10 regular queries plus the $queryStats query, means 11 entries evicted when the cache is -// cleared. -assert.eq(testDB.serverStatus().metrics.queryStats.numEvicted, 11); - -// Calling $queryStats should fail when the telemetry store size is 0 bytes. -assert.throwsWithCode(() => testDB.getSiblingDB("admin").aggregate([{$queryStats: {}}]), 6579000); -MongoRunner.stopMongod(conn); -}()); diff --git a/jstests/noPassthrough/queryStats/documentSourceQueryStats_redaction_parameters.js b/jstests/noPassthrough/queryStats/documentSourceQueryStats_redaction_parameters.js deleted file mode 100644 index 8facb106072..00000000000 --- a/jstests/noPassthrough/queryStats/documentSourceQueryStats_redaction_parameters.js +++ /dev/null @@ -1,100 +0,0 @@ -/** - * Test the $queryStats hmac properties. - * @tags: [featureFlagQueryStats] - */ - -load("jstests/aggregation/extras/utils.js"); // For assertAdminDBErrCodeAndErrMsgContains. -load("jstests/libs/telemetry_utils.js"); - -(function() { -"use strict"; - -// Assert the expected telemetry key with no hmac. -function assertTelemetryKeyWithoutHmac(telemetryKey) { - assert.eq(telemetryKey.filter, {"foo": {"$lte": "?number"}}); - assert.eq(telemetryKey.sort, {"bar": -1}); - assert.eq(telemetryKey.limit, "?number"); -} - -function runTest(conn) { - const testDB = conn.getDB('test'); - var coll = testDB[jsTestName()]; - coll.drop(); - - coll.insert({foo: 1}); - coll.find({foo: {$lte: 2}}).sort({bar: -1}).limit(2).toArray(); - // Default is no hmac. - assertTelemetryKeyWithoutHmac(getTelemetry(conn)[0].key.queryShape); - - // Turning on hmac should apply hmac to all field names on all entries, even previously cached - // ones. - const telemetryKey = getTelemetryRedacted(conn)[0]["key"]; - assert.eq(telemetryKey.queryShape.filter, - {"fNWkKfogMv6MJ77LpBcuPrO7Nq+R+7TqtD+Lgu3Umc4=": {"$lte": "?number"}}); - assert.eq(telemetryKey.queryShape.sort, {"CDDQIXZmDehLKmQcRxtdOQjMqoNqfI2nGt2r4CgJ52o=": -1}); - assert.eq(telemetryKey.queryShape.limit, "?number"); - - // Turning hmac back off should preserve field names on all entries, even previously cached - // ones. - assertTelemetryKeyWithoutHmac(getTelemetry(conn)[0]["key"].queryShape); - - // Explicitly set applyHmacToIdentifiers to false. - assertTelemetryKeyWithoutHmac(getTelemetryRedacted(conn, false)[0]["key"].queryShape); - - // Wrong parameter name throws error. - let pipeline = [{$queryStats: {redactFields: true}}]; - assertAdminDBErrCodeAndErrMsgContains( - coll, - pipeline, - ErrorCodes.FailedToParse, - "$queryStats parameters object may only contain 'applyHmacToIdentifiers' or 'hmacKey' options. Found: redactFields"); - - // Wrong parameter type throws error. - pipeline = [{$queryStats: {applyHmacToIdentifiers: 1}}]; - assertAdminDBErrCodeAndErrMsgContains( - coll, - pipeline, - ErrorCodes.FailedToParse, - "$queryStats applyHmacToIdentifiers parameter must be boolean. Found type: double"); - - pipeline = [{$queryStats: {hmacKey: 1}}]; - assertAdminDBErrCodeAndErrMsgContains( - coll, - pipeline, - ErrorCodes.FailedToParse, - "$queryStats hmacKey parameter must be bindata of length 32 or greater. Found type: double"); - - // Parameter object with unrecognized key throws error. - pipeline = [{$queryStats: {applyHmacToIdentifiers: true, hmacStrategy: "on"}}]; - assertAdminDBErrCodeAndErrMsgContains( - coll, - pipeline, - ErrorCodes.FailedToParse, - "$queryStats parameters object may only contain 'applyHmacToIdentifiers' or 'hmacKey' options. Found: hmacStrategy"); -} - -const conn = MongoRunner.runMongod({ - setParameter: { - internalQueryStatsSamplingRate: -1, - featureFlagQueryStats: true, - } -}); -runTest(conn); -MongoRunner.stopMongod(conn); - -const st = new ShardingTest({ - mongos: 1, - shards: 1, - config: 1, - rs: {nodes: 1}, - mongosOptions: { - setParameter: { - internalQueryStatsSamplingRate: -1, - featureFlagQueryStats: true, - 'failpoint.skipClusterParameterRefresh': "{'mode':'alwaysOn'}" - } - }, -}); -runTest(st.s); -st.stop(); -}()); diff --git a/jstests/noPassthrough/queryStats/feature_flag_off_sampling_rate_on.js b/jstests/noPassthrough/queryStats/feature_flag_off_sampling_rate_on.js deleted file mode 100644 index 38474b944d0..00000000000 --- a/jstests/noPassthrough/queryStats/feature_flag_off_sampling_rate_on.js +++ /dev/null @@ -1,54 +0,0 @@ -/** - * Test that calls to read from telemetry store fail when feature flag is turned off and sampling - * rate > 0. - */ -load('jstests/libs/analyze_plan.js'); -load("jstests/libs/feature_flag_util.js"); - -(function() { -"use strict"; - -// Set sampling rate to -1. -let options = { - setParameter: {internalQueryStatsSamplingRate: -1}, -}; -const conn = MongoRunner.runMongod(options); -const testdb = conn.getDB('test'); - -// This test specifically tests error handling when the feature flag is not on. -// TODO SERVER-65800 This test can be deleted when the feature is on by default. -if (!conn || FeatureFlagUtil.isEnabled(testdb, "QueryStats")) { - jsTestLog(`Skipping test since feature flag is disabled. conn: ${conn}`); - if (conn) { - MongoRunner.stopMongod(conn); - } - return; -} - -var coll = testdb[jsTestName()]; -coll.drop(); - -// Bulk insert documents to reduces roundtrips and make timeout on a slow machine less likely. -const bulk = coll.initializeUnorderedBulkOp(); -for (let i = 1; i <= 20; i++) { - bulk.insert({foo: 0, bar: Math.floor(Math.random() * 3)}); -} -assert.commandWorked(bulk.execute()); - -// Pipeline to read telemetry store should fail without feature flag turned on even though sampling -// rate is > 0. -assert.commandFailedWithCode( - testdb.adminCommand({aggregate: 1, pipeline: [{$queryStats: {}}], cursor: {}}), - ErrorCodes.QueryFeatureNotAllowed); - -// Pipeline, with a filter, to read telemetry store fails without feature flag turned on even though -// sampling rate is > 0. -assert.commandFailedWithCode(testdb.adminCommand({ - aggregate: 1, - pipeline: [{$queryStats: {}}, {$match: {"key.queryShape.find": {$eq: "###"}}}], - cursor: {} -}), - ErrorCodes.QueryFeatureNotAllowed); - -MongoRunner.stopMongod(conn); -}()); diff --git a/jstests/noPassthrough/queryStats/query_stats_collect_on_mongos.js b/jstests/noPassthrough/queryStats/query_stats_collect_on_mongos.js deleted file mode 100644 index 97057269527..00000000000 --- a/jstests/noPassthrough/queryStats/query_stats_collect_on_mongos.js +++ /dev/null @@ -1,277 +0,0 @@ -/** - * Test that mongos is collecting telemetry metrics. - * @tags: [featureFlagQueryStats] - */ - -load('jstests/libs/telemetry_utils.js'); - -(function() { -"use strict"; - -// Redacted literal replacement string. This may change in the future, so it's factored out. -const aggRedactString = "###"; -const setup = () => { - const st = new ShardingTest({ - mongos: 1, - shards: 1, - config: 1, - rs: {nodes: 1}, - mongosOptions: { - setParameter: { - internalQueryStatsSamplingRate: -1, - 'failpoint.skipClusterParameterRefresh': "{'mode':'alwaysOn'}" - } - }, - }); - const mongos = st.s; - const db = mongos.getDB("test"); - const coll = db.coll; - coll.insert({v: 1}); - coll.insert({v: 4}); - return st; -}; - -const assertExpectedResults = (results, - expectedTelemetryKey, - expectedExecCount, - expectedDocsReturnedSum, - expectedDocsReturnedMax, - expectedDocsReturnedMin, - expectedDocsReturnedSumOfSq) => { - const {key, metrics} = results; - assert.eq(expectedTelemetryKey, key); - assert.eq(expectedExecCount, metrics.execCount); - assert.docEq({ - sum: NumberLong(expectedDocsReturnedSum), - max: NumberLong(expectedDocsReturnedMax), - min: NumberLong(expectedDocsReturnedMin), - sumOfSquares: NumberLong(expectedDocsReturnedSumOfSq) - }, - metrics.docsReturned); - - // This test can't predict exact timings, so just assert these three fields have been set (are - // non-zero). - const {firstSeenTimestamp, lastExecutionMicros, queryExecMicros} = metrics; - - assert.neq(timestampCmp(firstSeenTimestamp, Timestamp(0, 0)), 0); - assert.neq(lastExecutionMicros, NumberLong(0)); - - const distributionFields = ['sum', 'max', 'min', 'sumOfSquares']; - for (const field of distributionFields) { - assert.neq(queryExecMicros[field], NumberLong(0)); - } -}; - -// Assert that, for find queries, no telemetry results are written until a cursor has reached -// exhaustion; ensure accurate results once they're written. -{ - const st = setup(); - const db = st.s.getDB("test"); - const collName = "coll"; - const coll = db[collName]; - - const telemetryKey = { - queryShape: { - cmdNs: {db: "test", coll: "coll"}, - command: "find", - filter: {$and: [{v: {$gt: "?number"}}, {v: {$lt: "?number"}}]}, - }, - readConcern: {level: "local", provenance: "implicitDefault"}, - batchSize: "?number", - applicationName: "MongoDB Shell", - }; - - const cursor = coll.find({v: {$gt: 0, $lt: 5}}).batchSize(1); // returns 1 doc - - // Since the cursor hasn't been exhausted yet, ensure no telemetry results have been written - // yet. - let telemetry = getTelemetry(db); - assert.eq(0, telemetry.length, telemetry); - - // Run a getMore to exhaust the cursor, then ensure telemetry results have been written - // accurately. batchSize must be 2 so the cursor recognizes exhaustion. - assert.commandWorked(db.runCommand({ - getMore: cursor.getId(), - collection: coll.getName(), - batchSize: 2 - })); // returns 1 doc, exhausts the cursor - // The $queryStats query for the previous `getTelemetry` is included in this call to - // $queryStats. - telemetry = getTelemetry(db); - assert.eq(2, telemetry.length, telemetry); - assertExpectedResults(telemetry[0], - telemetryKey, - /* expectedExecCount */ 1, - /* expectedDocsReturnedSum */ 2, - /* expectedDocsReturnedMax */ 2, - /* expectedDocsReturnedMin */ 2, - /* expectedDocsReturnedSumOfSq */ 4); - - // Run more queries (to exhaustion) with the same query shape, and ensure telemetry results are - // accurate. - coll.find({v: {$gt: 2, $lt: 3}}).batchSize(10).toArray(); // returns 0 docs - coll.find({v: {$gt: 0, $lt: 1}}).batchSize(10).toArray(); // returns 0 docs - coll.find({v: {$gt: 0, $lt: 2}}).batchSize(10).toArray(); // return 1 doc - telemetry = getTelemetry(db); - assert.eq(2, telemetry.length, telemetry); - assertExpectedResults(telemetry[0], - telemetryKey, - /* expectedExecCount */ 4, - /* expectedDocsReturnedSum */ 3, - /* expectedDocsReturnedMax */ 2, - /* expectedDocsReturnedMin */ 0, - /* expectedDocsReturnedSumOfSq */ 5); - - st.stop(); -} - -// Assert that, for agg queries, no telemetry results are written until a cursor has reached -// exhaustion; ensure accurate results once they're written. -{ - const st = setup(); - const db = st.s.getDB("test"); - const coll = db.coll; - - const telemetryKey = { - pipeline: [ - {$match: {v: {$gt: aggRedactString, $lt: aggRedactString}}}, - {$project: {hello: aggRedactString}}, - ], - namespace: "test.coll", - applicationName: "MongoDB Shell" - }; - - const cursor = coll.aggregate( - [ - {$match: {v: {$gt: 0, $lt: 5}}}, - {$project: {hello: "$world"}}, - ], - {cursor: {batchSize: 1}}); // returns 1 doc - - // Since the cursor hasn't been exhausted yet, ensure no telemetry results have been written - // yet. - let telemetry = getTelemetry(db); - assert.eq(0, telemetry.length, telemetry); - - // Run a getMore to exhaust the cursor, then ensure telemetry results have been written - // accurately. batchSize must be 2 so the cursor recognizes exhaustion. - assert.commandWorked(db.runCommand({ - getMore: cursor.getId(), - collection: coll.getName(), - batchSize: 2 - })); // returns 1 doc, exhausts the cursor - // The $queryStats query for the previous `getTelemetry` is included in this call to - // $queryStats. - telemetry = getTelemetry(db); - assert.eq(2, telemetry.length, telemetry); - assertExpectedResults(telemetry[0], - telemetryKey, - /* expectedExecCount */ 1, - /* expectedDocsReturnedSum */ 2, - /* expectedDocsReturnedMax */ 2, - /* expectedDocsReturnedMin */ 2, - /* expectedDocsReturnedSumOfSq */ 4); - - // Run more queries (to exhaustion) with the same query shape, and ensure telemetry results are - // accurate. - coll.aggregate([ - {$match: {v: {$gt: 0, $lt: 5}}}, - {$project: {hello: "$world"}}, - ]); // returns 2 docs - coll.aggregate([ - {$match: {v: {$gt: 2, $lt: 3}}}, - {$project: {hello: "$universe"}}, - ]); // returns 0 docs - coll.aggregate([ - {$match: {v: {$gt: 0, $lt: 2}}}, - {$project: {hello: "$galaxy"}}, - ]); // returns 1 doc - telemetry = getTelemetry(db); - assert.eq(2, telemetry.length, telemetry); - assertExpectedResults(telemetry[0], - telemetryKey, - /* expectedExecCount */ 4, - /* expectedDocsReturnedSum */ 5, - /* expectedDocsReturnedMax */ 2, - /* expectedDocsReturnedMin */ 0, - /* expectedDocsReturnedSumOfSq */ 9); - - st.stop(); -} - -// Assert on batchSize-limited find queries that killCursors will write metrics with partial results -// to the telemetry store. -{ - const st = setup(); - const db = st.s.getDB("test"); - const collName = "coll"; - const coll = db[collName]; - - const telemetryKey = { - queryShape: { - cmdNs: {db: "test", coll: "coll"}, - command: "find", - filter: {$and: [{v: {$gt: "?number"}}, {v: {$lt: "?number"}}]}, - }, - readConcern: {level: "local", provenance: "implicitDefault"}, - batchSize: "?number", - applicationName: "MongoDB Shell" - }; - - const cursor1 = coll.find({v: {$gt: 0, $lt: 5}}).batchSize(1); // returns 1 doc - const cursor2 = coll.find({v: {$gt: 0, $lt: 2}}).batchSize(1); // returns 1 doc - - assert.commandWorked( - db.runCommand({killCursors: coll.getName(), cursors: [cursor1.getId(), cursor2.getId()]})); - - const telemetry = getTelemetry(db); - assert.eq(1, telemetry.length); - assertExpectedResults(telemetry[0], - telemetryKey, - /* expectedExecCount */ 2, - /* expectedDocsReturnedSum */ 2, - /* expectedDocsReturnedMax */ 1, - /* expectedDocsReturnedMin */ 1, - /* expectedDocsReturnedSumOfSq */ 2); - st.stop(); -} - -// Assert on batchSize-limited agg queries that killCursors will write metrics with partial results -// to the telemetry store. -{ - const st = setup(); - const db = st.s.getDB("test"); - const coll = db.coll; - - const telemetryKey = { - pipeline: [{$match: {v: {$gt: aggRedactString, $lt: aggRedactString}}}], - namespace: `test.${coll.getName()}`, - applicationName: "MongoDB Shell" - }; - - const cursor1 = coll.aggregate( - [ - {$match: {v: {$gt: 0, $lt: 5}}}, - ], - {cursor: {batchSize: 1}}); // returns 1 doc - const cursor2 = coll.aggregate( - [ - {$match: {v: {$gt: 0, $lt: 2}}}, - ], - {cursor: {batchSize: 1}}); // returns 1 doc - - assert.commandWorked( - db.runCommand({killCursors: coll.getName(), cursors: [cursor1.getId(), cursor2.getId()]})); - - const telemetry = getTelemetry(db); - assert.eq(1, telemetry.length); - assertExpectedResults(telemetry[0], - telemetryKey, - /* expectedExecCount */ 2, - /* expectedDocsReturnedSum */ 2, - /* expectedDocsReturnedMax */ 1, - /* expectedDocsReturnedMin */ 1, - /* expectedDocsReturnedSumOfSq */ 2); - st.stop(); -} -}()); diff --git a/jstests/noPassthrough/queryStats/query_stats_feature_flag.js b/jstests/noPassthrough/queryStats/query_stats_feature_flag.js deleted file mode 100644 index bcce489d8da..00000000000 --- a/jstests/noPassthrough/queryStats/query_stats_feature_flag.js +++ /dev/null @@ -1,34 +0,0 @@ -/** - * Test that calls to read from telemetry store fail when feature flag is turned off. - */ -load('jstests/libs/analyze_plan.js'); -load("jstests/libs/feature_flag_util.js"); - -(function() { -"use strict"; - -// This test specifically tests error handling when the feature flag is not on. -// TODO SERVER-65800 this test can be removed when the feature flag is removed. -const conn = MongoRunner.runMongod(); -const testDB = conn.getDB('test'); -if (FeatureFlagUtil.isEnabled(testDB, "QueryStats")) { - jsTestLog("Skipping test since query stats are enabled."); - MongoRunner.stopMongod(conn); - return; -} - -// Pipeline to read telemetry store should fail without feature flag turned on. -assert.commandFailedWithCode( - testDB.adminCommand({aggregate: 1, pipeline: [{$queryStats: {}}], cursor: {}}), - ErrorCodes.QueryFeatureNotAllowed); - -// Pipeline, with a filter, to read telemetry store fails without feature flag turned on. -assert.commandFailedWithCode(testDB.adminCommand({ - aggregate: 1, - pipeline: [{$queryStats: {}}, {$match: {"key.queryShape.find": {$eq: "###"}}}], - cursor: {} -}), - ErrorCodes.QueryFeatureNotAllowed); - -MongoRunner.stopMongod(conn); -}()); diff --git a/jstests/noPassthrough/queryStats/query_stats_key.js b/jstests/noPassthrough/queryStats/query_stats_key.js deleted file mode 100644 index 8b63417078a..00000000000 --- a/jstests/noPassthrough/queryStats/query_stats_key.js +++ /dev/null @@ -1,111 +0,0 @@ -/** - * This test confirms that telemetry store key fields are properly nested and none are missing. - * @tags: [featureFlagTelemetry] - */ -load("jstests/libs/telemetry_utils.js"); -(function() { -"use strict"; - -function confirmAllFieldsPresent(queryStatsEntries) { - const kApplicationName = "MongoDB Shell"; - const queryShapeFindFields = [ - "cmdNs", - "command", - "filter", - "sort", - "projection", - "hint", - "skip", - "limit", - "singleBatch", - "max", - "min", - "returnKey", - "showRecordId", - "tailable", - "oplogReplay", - "awaitData", - "collation", - "allowDiskUse", - "let" - ]; - - // The outer fields not nested inside queryShape. - const queryStatsKeyFields = [ - "queryShape", - "batchSize", - "comment", - "maxTimeMS", - "noCursorTimeout", - "readConcern", - "allowPartialResults", - "applicationName" - ]; - - for (const entry of queryStatsEntries) { - let fieldCounter = 0; - assert.eq(entry.key.queryShape.command, "find"); - assert.eq(entry.key.applicationName, kApplicationName); - - for (const field in entry.key.queryShape) { - assert(queryShapeFindFields.includes(field)); - fieldCounter++; - } - assert.eq(fieldCounter, queryShapeFindFields.length); - - fieldCounter = 0; - for (const field in entry.key) { - assert(queryStatsKeyFields.includes(field)); - fieldCounter++; - } - assert.eq(fieldCounter, queryStatsKeyFields.length); - } -} - -// Turn on the collecting of telemetry metrics. -let options = { - setParameter: {internalQueryStatsSamplingRate: -1}, -}; - -const conn = MongoRunner.runMongod(options); -const testDB = conn.getDB('test'); -var coll = testDB[jsTestName()]; -coll.drop(); - -// Have to create an index for hint not to fail. -assert.commandWorked(coll.createIndex({v: 1})); - -let commandObj = { - find: coll.getName(), - filter: {v: {$eq: 2}}, - oplogReplay: true, - comment: "this is a test!!", - min: {"v": 0}, - max: {"v": 4}, - hint: {"v": 1}, - sort: {a: -1}, - returnKey: false, - noCursorTimeout: true, - showRecordId: false, - tailable: false, - awaitData: false, - allowPartialResults: true, - skip: 1, - limit: 2, - maxTimeMS: 500, - collation: {locale: "en_US", strength: 2}, - allowDiskUse: true, - readConcern: {level: "local"}, - batchSize: 2, - singleBatch: true, - let : {}, - projection: {_id: 0}, -}; - -assert.commandWorked(testDB.runCommand(commandObj)); -let telemetry = getTelemetry(conn); -assert.eq(1, telemetry.length); -confirmAllFieldsPresent(telemetry); - -MongoRunner.stopMongod(conn); -}()); diff --git a/jstests/noPassthrough/queryStats/query_stats_metrics_across_getMore_calls.js b/jstests/noPassthrough/queryStats/query_stats_metrics_across_getMore_calls.js deleted file mode 100644 index d5caea74cf7..00000000000 --- a/jstests/noPassthrough/queryStats/query_stats_metrics_across_getMore_calls.js +++ /dev/null @@ -1,159 +0,0 @@ -/** - * Test that the telemetry metrics are aggregated properly by distinct query shape over getMore - * calls. - * @tags: [featureFlagQueryStats] - */ -load("jstests/libs/telemetry_utils.js"); // For verifyMetrics. - -(function() { -"use strict"; - -// Turn on the collecting of telemetry metrics. -let options = { - setParameter: {internalQueryStatsSamplingRate: -1}, -}; - -const conn = MongoRunner.runMongod(options); -const testDB = conn.getDB('test'); -var coll = testDB[jsTestName()]; -coll.drop(); - -// Bulk insert documents to reduces roundtrips and make timeout on a slow machine less likely. -const bulk = coll.initializeUnorderedBulkOp(); -const numDocs = 100; -for (let i = 0; i < numDocs / 2; ++i) { - bulk.insert({foo: 0, bar: Math.floor(Math.random() * 3)}); - bulk.insert({foo: 1, bar: Math.floor(Math.random() * -2)}); -} -assert.commandWorked(bulk.execute()); - -// Assert that two queries with identical structures are represented by the same key. -{ - // Note: toArray() is necessary for the batchSize-limited query to run to cursor exhaustion - // (when it writes to the telemetry store). - coll.aggregate([{$match: {foo: 1}}], {cursor: {batchSize: 2}}).toArray(); - coll.aggregate([{$match: {foo: 0}}], {cursor: {batchSize: 2}}).toArray(); - - // This command will return all telemetry store entires. - const telemetryResults = testDB.getSiblingDB("admin").aggregate([{$queryStats: {}}]).toArray(); - // Assert there is only one entry. - assert.eq(telemetryResults.length, 1, telemetryResults); - const telemetryEntry = telemetryResults[0]; - assert.eq(telemetryEntry.key.namespace, `test.${jsTestName()}`); - assert.eq(telemetryEntry.key.applicationName, "MongoDB Shell"); - - // Assert we update execution count for identically shaped queries. - assert.eq(telemetryEntry.metrics.execCount, 2); - - // Assert telemetry values are accurate for the two above queries. - assert.eq(telemetryEntry.metrics.docsReturned.sum, numDocs); - assert.eq(telemetryEntry.metrics.docsReturned.min, numDocs / 2); - assert.eq(telemetryEntry.metrics.docsReturned.max, numDocs / 2); - - verifyMetrics(telemetryResults); -} - -const fooEqBatchSize = 5; -const fooNeBatchSize = 3; -// Assert on batchSize-limited queries that killCursors will write metrics with partial results to -// the telemetry store. -{ - let cursor1 = coll.find({foo: {$eq: 0}}).batchSize(fooEqBatchSize); - let cursor2 = coll.find({foo: {$ne: 0}}).batchSize(fooNeBatchSize); - // Issue one getMore for the first query, so 2 * fooEqBatchSize documents are returned total. - assert.commandWorked(testDB.runCommand( - {getMore: cursor1.getId(), collection: coll.getName(), batchSize: fooEqBatchSize})); - - // Kill both cursors so the telemetry metrics are stored. - assert.commandWorked(testDB.runCommand( - {killCursors: coll.getName(), cursors: [cursor1.getId(), cursor2.getId()]})); - - // This filters telemetry entires to just the ones entered when running above find queries. - const telemetryResults = testDB.getSiblingDB("admin") - .aggregate([ - {$queryStats: {}}, - {$match: {"key.queryShape.filter.foo": {$exists: true}}}, - {$sort: {key: 1}}, - ]) - .toArray(); - assert.eq(telemetryResults.length, 2, telemetryResults); - assert.eq(telemetryResults[0].key.queryShape.cmdNs.db, "test"); - assert.eq(telemetryResults[0].key.queryShape.cmdNs.coll, jsTestName()); - assert.eq(telemetryResults[0].key.applicationName, "MongoDB Shell"); - assert.eq(telemetryResults[1].key.queryShape.cmdNs.db, "test"); - assert.eq(telemetryResults[1].key.queryShape.cmdNs.coll, jsTestName()); - assert.eq(telemetryResults[1].key.applicationName, "MongoDB Shell"); - - assert.eq(telemetryResults[0].metrics.execCount, 1); - assert.eq(telemetryResults[1].metrics.execCount, 1); - assert.eq(telemetryResults[0].metrics.docsReturned.sum, fooEqBatchSize * 2); - assert.eq(telemetryResults[1].metrics.docsReturned.sum, fooNeBatchSize); - - verifyMetrics(telemetryResults); -} - -// Assert that options such as limit/sort create different keys, and that repeating a query shape -// ({foo: {$eq}}) aggregates metrics across executions. -{ - const query2Limit = 50; - coll.find({foo: {$eq: 0}}).batchSize(2).toArray(); - coll.find({foo: {$eq: 1}}).limit(query2Limit).batchSize(2).toArray(); - coll.find().sort({"foo": 1}).batchSize(2).toArray(); - // This filters telemetry entires to just the ones entered when running above find queries. - let telemetryResults = - testDB.getSiblingDB("admin") - .aggregate([{$queryStats: {}}, {$match: {"key.queryShape.command": "find"}}]) - .toArray(); - assert.eq(telemetryResults.length, 4, telemetryResults); - - verifyMetrics(telemetryResults); - - // This filters to just the telemetry for query coll.find().sort({"foo": 1}).batchSize(2). - telemetryResults = testDB.getSiblingDB("admin") - .aggregate([{$queryStats: {}}, {$match: {"key.queryShape.sort.foo": 1}}]) - .toArray(); - assert.eq(telemetryResults.length, 1, telemetryResults); - assert.eq(telemetryResults[0].key.queryShape.cmdNs.db, "test"); - assert.eq(telemetryResults[0].key.queryShape.cmdNs.coll, jsTestName()); - assert.eq(telemetryResults[0].key.applicationName, "MongoDB Shell"); - assert.eq(telemetryResults[0].metrics.execCount, 1); - assert.eq(telemetryResults[0].metrics.docsReturned.sum, numDocs); - - // This filters to just the telemetry for query coll.find({foo: {$eq: - // 1}}).limit(query2Limit).batchSize(2). - telemetryResults = - testDB.getSiblingDB("admin") - .aggregate([{$queryStats: {}}, {$match: {"key.queryShape.limit": '?number'}}]) - .toArray(); - assert.eq(telemetryResults.length, 1, telemetryResults); - assert.eq(telemetryResults[0].key.queryShape.cmdNs.db, "test"); - assert.eq(telemetryResults[0].key.queryShape.cmdNs.coll, jsTestName()); - assert.eq(telemetryResults[0].key.applicationName, "MongoDB Shell"); - assert.eq(telemetryResults[0].metrics.execCount, 1); - assert.eq(telemetryResults[0].metrics.docsReturned.sum, query2Limit); - - // This filters to just the telemetry for query coll.find({foo: {$eq: 0}}).batchSize(2). - telemetryResults = testDB.getSiblingDB("admin") - .aggregate([ - {$queryStats: {}}, - { - $match: { - "key.queryShape.filter.foo": {$eq: {$eq: "?number"}}, - "key.queryShape.limit": {$exists: false}, - "key.queryShape.sort": {$exists: false} - } - } - ]) - .toArray(); - assert.eq(telemetryResults.length, 1, telemetryResults); - assert.eq(telemetryResults[0].key.queryShape.cmdNs.db, "test"); - assert.eq(telemetryResults[0].key.queryShape.cmdNs.coll, jsTestName()); - assert.eq(telemetryResults[0].key.applicationName, "MongoDB Shell"); - assert.eq(telemetryResults[0].metrics.execCount, 2); - assert.eq(telemetryResults[0].metrics.docsReturned.sum, numDocs / 2 + 2 * fooEqBatchSize); - assert.eq(telemetryResults[0].metrics.docsReturned.max, numDocs / 2); - assert.eq(telemetryResults[0].metrics.docsReturned.min, 2 * fooEqBatchSize); -} - -MongoRunner.stopMongod(conn); -}()); diff --git a/jstests/noPassthrough/queryStats/query_stats_redact_find_cmd.js b/jstests/noPassthrough/queryStats/query_stats_redact_find_cmd.js deleted file mode 100644 index b2cce48cdb7..00000000000 --- a/jstests/noPassthrough/queryStats/query_stats_redact_find_cmd.js +++ /dev/null @@ -1,69 +0,0 @@ -/** - * Test that $queryStats properly applies hmac to find commands, on mongod and mongos. - */ -load("jstests/libs/telemetry_utils.js"); -(function() { -"use strict"; - -const kHashedCollName = "w6Ax20mVkbJu4wQWAMjL8Sl+DfXAr2Zqdc3kJRB7Oo0="; -const kHashedFieldName = "lU7Z0mLRPRUL+RfAD5jhYPRRpXBsZBxS/20EzDwfOG4="; - -function runTest(conn) { - const db = conn.getDB("test"); - const admin = conn.getDB("admin"); - - db.test.drop(); - db.test.insert({v: 1}); - - db.test.find({v: 1}).toArray(); - - let telemetry = getTelemetryRedacted(admin); - - assert.eq(1, telemetry.length); - assert.eq("find", telemetry[0].key.queryShape.command); - assert.eq({[kHashedFieldName]: {$eq: "?number"}}, telemetry[0].key.queryShape.filter); - - db.test.insert({v: 2}); - - const cursor = db.test.find({v: {$gt: 0, $lt: 3}}).batchSize(1); - telemetry = getTelemetryRedacted(admin); - // Cursor isn't exhausted, so there shouldn't be another entry yet. - assert.eq(1, telemetry.length); - - assert.commandWorked( - db.runCommand({getMore: cursor.getId(), collection: db.test.getName(), batchSize: 2})); - - telemetry = getTelemetryRedacted(admin); - assert.eq(2, telemetry.length); - assert.eq("find", telemetry[1].key.queryShape.command); - assert.eq({ - "$and": [{[kHashedFieldName]: {"$gt": "?number"}}, {[kHashedFieldName]: {"$lt": "?number"}}] - }, - telemetry[1].key.queryShape.filter); -} - -const conn = MongoRunner.runMongod({ - setParameter: { - internalQueryStatsSamplingRate: -1, - featureFlagQueryStats: true, - } -}); -runTest(conn); -MongoRunner.stopMongod(conn); - -const st = new ShardingTest({ - mongos: 1, - shards: 1, - config: 1, - rs: {nodes: 1}, - mongosOptions: { - setParameter: { - internalQueryStatsSamplingRate: -1, - featureFlagQueryStats: true, - 'failpoint.skipClusterParameterRefresh': "{'mode':'alwaysOn'}" - } - }, -}); -runTest(st.s); -st.stop(); -}()); diff --git a/jstests/noPassthrough/queryStats/query_stats_sampling_rate.js b/jstests/noPassthrough/queryStats/query_stats_sampling_rate.js deleted file mode 100644 index 009c59737fa..00000000000 --- a/jstests/noPassthrough/queryStats/query_stats_sampling_rate.js +++ /dev/null @@ -1,38 +0,0 @@ -/** - * Test that calls to read from telemetry store fail when sampling rate is not greater than 0 even - * if feature flag is on. - * @tags: [featureFlagQueryStats] - */ -load('jstests/libs/analyze_plan.js'); - -(function() { -"use strict"; - -let options = { - setParameter: {internalQueryStatsSamplingRate: 0}, -}; - -const conn = MongoRunner.runMongod(options); -const testdb = conn.getDB('test'); -var coll = testdb[jsTestName()]; -coll.drop(); -for (var i = 0; i < 20; i++) { - coll.insert({foo: 0, bar: Math.floor(Math.random() * 3)}); -} - -coll.aggregate([{$match: {foo: 1}}], {cursor: {batchSize: 2}}); - -// Reading telemetry store with a sampling rate of 0 should return 0 documents. -let telStore = testdb.adminCommand({aggregate: 1, pipeline: [{$queryStats: {}}], cursor: {}}); -assert.eq(telStore.cursor.firstBatch.length, 0); - -// Reading telemetry store should work now with a sampling rate of greater than 0. -assert.commandWorked( - testdb.adminCommand({setParameter: 1, internalQueryStatsSamplingRate: 2147483647})); -coll.aggregate([{$match: {foo: 1}}], {cursor: {batchSize: 2}}); -telStore = assert.commandWorked( - testdb.adminCommand({aggregate: 1, pipeline: [{$queryStats: {}}], cursor: {}})); -assert.eq(telStore.cursor.firstBatch.length, 1); - -MongoRunner.stopMongod(conn); -}()); diff --git a/jstests/noPassthrough/queryStats/query_stats_server_status_metrics.js b/jstests/noPassthrough/queryStats/query_stats_server_status_metrics.js deleted file mode 100644 index b142d901a7f..00000000000 --- a/jstests/noPassthrough/queryStats/query_stats_server_status_metrics.js +++ /dev/null @@ -1,186 +0,0 @@ -/** - * Test the telemetry related serverStatus metrics. - * @tags: [featureFlagQueryStats] - */ -load('jstests/libs/analyze_plan.js'); - -(function() { -"use strict"; - -function runTestWithMongodOptions(mongodOptions, test, testOptions) { - const conn = MongoRunner.runMongod(mongodOptions); - const testDB = conn.getDB('test'); - const coll = testDB[jsTestName()]; - - test(conn, testDB, coll, testOptions); - - MongoRunner.stopMongod(conn); -} - -/** - * Test serverStatus metric which counts the number of evicted entries. - * - * testOptions must include `resetCacheSize` bool field; e.g., { resetCacheSize : true } - */ -function evictionTest(conn, testDB, coll, testOptions) { - const evictedBefore = testDB.serverStatus().metrics.queryStats.numEvicted; - assert.eq(evictedBefore, 0); - for (var i = 0; i < 4000; i++) { - let query = {}; - query["foo" + i] = "bar"; - coll.aggregate([{$match: query}]).itcount(); - } - if (!testOptions.resetCacheSize) { - const evictedAfter = testDB.serverStatus().metrics.queryStats.numEvicted; - assert.gt(evictedAfter, 0); - return; - } - // Make sure number of evicted entries increases when the cache size is reset, which forces out - // least recently used entries to meet the new, smaller size requirement. - assert.eq(testDB.serverStatus().metrics.queryStats.numEvicted, 0); - assert.commandWorked( - testDB.adminCommand({setParameter: 1, internalQueryConfigureQueryStatsCacheSize: "1MB"})); - const evictedAfter = testDB.serverStatus().metrics.queryStats.numEvicted; - assert.gt(evictedAfter, 0); -} - -/** - * Test serverStatus metric which counts the number of requests for which telemetry is not collected - * due to rate-limiting. - * - * testOptions must include `samplingRate` and `numRequests` number fields; - * e.g., { samplingRate: 2147483647, numRequests: 20 } - */ -function countRateLimitedRequestsTest(conn, testDB, coll, testOptions) { - const numRateLimitedRequestsBefore = - testDB.serverStatus().metrics.queryStats.numRateLimitedRequests; - assert.eq(numRateLimitedRequestsBefore, 0); - - coll.insert({a: 0}); - - // Running numRequests / 2 times since we dispatch two requests per iteration - for (var i = 0; i < testOptions.numRequests / 2; i++) { - coll.find({a: 0}).toArray(); - coll.aggregate([{$match: {a: 1}}]); - } - - const numRateLimitedRequestsAfter = - testDB.serverStatus().metrics.queryStats.numRateLimitedRequests; - - if (testOptions.samplingRate === 0) { - // Telemetry should not be collected for any requests. - assert.eq(numRateLimitedRequestsAfter, testOptions.numRequests); - } else if (testOptions.samplingRate >= testOptions.numRequests) { - // Telemetry should be collected for all requests. - assert.eq(numRateLimitedRequestsAfter, 0); - } else { - // Telemetry should be collected for some but not all requests. - assert.gt(numRateLimitedRequestsAfter, 0); - assert.lt(numRateLimitedRequestsAfter, testOptions.numRequests); - } -} - -function telemetryStoreSizeEstimateTest(conn, testDB, coll, testOptions) { - assert.eq(testDB.serverStatus().metrics.queryStats.queryStatsStoreSizeEstimateBytes, 0); - let halfWayPointSize; - // Only using three digit numbers (eg 100, 101) means the string length will be the same for all - // entries and therefore the key size will be the same for all entries, which makes predicting - // the total size of the store clean and easy. - for (var i = 100; i < 200; i++) { - coll.aggregate([{$match: {["foo" + i]: "bar"}}]).itcount(); - if (i == 150) { - halfWayPointSize = - testDB.serverStatus().metrics.queryStats.queryStatsStoreSizeEstimateBytes; - } - } - // Confirm that telemetry store has grown and size is non-zero. - assert.gt(halfWayPointSize, 0); - const fullSize = testDB.serverStatus().metrics.queryStats.queryStatsStoreSizeEstimateBytes; - assert.gt(fullSize, 0); - // Make sure the final telemetry store size is twice as much as the halfway point size (+/- 5%) - assert(fullSize >= halfWayPointSize * 1.95 && fullSize <= halfWayPointSize * 2.05, - tojson({fullSize, halfWayPointSize})); -} - -function telemetryStoreWriteErrorsTest(conn, testDB, coll, testOptions) { - const debugBuild = testDB.adminCommand('buildInfo').debug; - if (debugBuild) { - jsTestLog("Skipping telemetry store write errors test because debug build will tassert."); - return; - } - - const errorsBefore = testDB.serverStatus().metrics.queryStats.numQueryStatsStoreWriteErrors; - assert.eq(errorsBefore, 0); - for (let i = 0; i < 5; i++) { - // Command should succeed and record the error. - let query = {}; - query["foo" + i] = "bar"; - coll.aggregate([{$match: query}]).itcount(); - } - - // Make sure that we recorded a write error for each run. - // TODO SERVER-73152 we attempt to write to the telemetry store twice for each aggregate, which - // seems wrong. - assert.eq(testDB.serverStatus().metrics.queryStats.numQueryStatsStoreWriteErrors, 10); -} - -/** - * In this configuration, we insert enough entries into the telemetry store to trigger LRU - * eviction. - */ -runTestWithMongodOptions({ - setParameter: - {internalQueryConfigureQueryStatsCacheSize: "1MB", internalQueryStatsSamplingRate: -1}, -}, - evictionTest, - {resetCacheSize: false}); -/** - * In this configuration, eviction is triggered only when the telemetry store size is reset. - * */ -runTestWithMongodOptions({ - setParameter: - {internalQueryConfigureQueryStatsCacheSize: "4MB", internalQueryStatsSamplingRate: -1}, -}, - evictionTest, - {resetCacheSize: true}); - -/** - * In this configuration, every query is sampled, so no requests should be rate-limited. - */ -runTestWithMongodOptions({ - setParameter: {internalQueryStatsSamplingRate: -1}, -}, - countRateLimitedRequestsTest, - {samplingRate: 2147483647, numRequests: 20}); - -/** - * In this configuration, the sampling rate is set so that some but not all requests are - * rate-limited. - */ -runTestWithMongodOptions({ - setParameter: {internalQueryStatsSamplingRate: 10}, -}, - countRateLimitedRequestsTest, - {samplingRate: 10, numRequests: 20}); - -/** - * Sample all queries and assert that the size of telemetry store is equal to num entries * entry - * size - */ -runTestWithMongodOptions({ - setParameter: {internalQueryStatsSamplingRate: -1}, -}, - telemetryStoreSizeEstimateTest); - -/** - * Use a very small telemetry store size and assert that errors in writing to the telemetry store - * are tracked. - */ -runTestWithMongodOptions({ - setParameter: { - internalQueryConfigureQueryStatsCacheSize: "0.00001MB", - internalQueryStatsSamplingRate: -1 - }, -}, - telemetryStoreWriteErrorsTest); -}()); diff --git a/jstests/noPassthrough/queryStats/query_stats_upgrade.js b/jstests/noPassthrough/queryStats/query_stats_upgrade.js deleted file mode 100644 index 919d9f87baf..00000000000 --- a/jstests/noPassthrough/queryStats/query_stats_upgrade.js +++ /dev/null @@ -1,43 +0,0 @@ -/** - * Test that telemetry doesn't work on a lower FCV version but works after an FCV upgrade. - * @tags: [featureFlagQueryStats] - */ -load('jstests/libs/analyze_plan.js'); -load("jstests/libs/feature_flag_util.js"); - -(function() { -"use strict"; - -const dbpath = MongoRunner.dataPath + jsTestName(); -let conn = MongoRunner.runMongod({dbpath: dbpath}); -let testDB = conn.getDB(jsTestName()); -// This test should only be run with the flag enabled. -assert(FeatureFlagUtil.isEnabled(testDB, "QueryStats")); - -function testLower(restart = false) { - let adminDB = conn.getDB("admin"); - assert.commandWorked(adminDB.runCommand( - {setFeatureCompatibilityVersion: binVersionToFCV("last-lts"), confirm: true})); - if (restart) { - MongoRunner.stopMongod(conn); - conn = MongoRunner.runMongod({dbpath: dbpath, noCleanData: true}); - testDB = conn.getDB(jsTestName()); - adminDB = conn.getDB("admin"); - } - - assert.commandFailedWithCode( - testDB.adminCommand({aggregate: 1, pipeline: [{$queryStats: {}}], cursor: {}}), 6579000); - - // Upgrade FCV. - assert.commandWorked(adminDB.runCommand( - {setFeatureCompatibilityVersion: binVersionToFCV("latest"), confirm: true})); - - // We should be able to run a telemetry pipeline now that the FCV is correct. - assert.commandWorked( - testDB.adminCommand({aggregate: 1, pipeline: [{$queryStats: {}}], cursor: {}}), - ); -} -testLower(true); -testLower(false); -MongoRunner.stopMongod(conn); -})(); diff --git a/jstests/noPassthrough/queryStats/redact_queries_with_nonobject_fields.js b/jstests/noPassthrough/queryStats/redact_queries_with_nonobject_fields.js deleted file mode 100644 index 7528ab9a4ab..00000000000 --- a/jstests/noPassthrough/queryStats/redact_queries_with_nonobject_fields.js +++ /dev/null @@ -1,76 +0,0 @@ -/** - * Test that telemetry key generation works for queries with non-object fields. - * @tags: [featureFlagQueryStats] - */ -load('jstests/libs/analyze_plan.js'); - -(function() { -"use strict"; - -// Turn on the collecting of telemetry metrics. -let options = { - setParameter: {internalQueryStatsSamplingRate: -1}, -}; - -const conn = MongoRunner.runMongod(options); -const testDB = conn.getDB('test'); -var collA = testDB[jsTestName()]; -var collB = testDB[jsTestName() + 'Two']; -collA.drop(); -collB.drop(); - -for (var i = 0; i < 200; i++) { - collA.insert({foo: 0, bar: Math.floor(Math.random() * 3)}); - collA.insert({foo: 1, bar: Math.floor(Math.random() * -2)}); - collB.insert({foo: Math.floor(Math.random() * 2), bar: Math.floor(Math.random() * 2)}); -} - -function confirmAggSuccess(collName, pipeline) { - const command = {aggregate: collName, cursor: {}}; - command.pipeline = pipeline; - assert.commandWorked(testDB.runCommand(command)); -} -// Test with non-object fields $limit and $skip. -confirmAggSuccess(collA.getName(), [{$sort: {bar: -1}}, {$limit: 2}, {$match: {foo: {$lte: 2}}}]); -confirmAggSuccess(collA.getName(), [{$sort: {bar: -1}}, {$skip: 50}, {$match: {foo: {$lte: 2}}}]); -confirmAggSuccess(collA.getName(), - [{$sort: {bar: -1}}, {$limit: 2}, {$skip: 50}, {$match: {foo: 0}}]); - -// Test non-object field, $unionWith. -confirmAggSuccess(collA.getName(), [{$unionWith: collB.getName()}]); - -// Test $limit in $setWindowFields for good measure. -confirmAggSuccess(collA.getName(), [ - {$_internalInhibitOptimization: {}}, - { - $setWindowFields: { - sortBy: {foo: 1}, - output: {sum: {$sum: "$bar", window: {documents: ["unbounded", "current"]}}} - } - }, - {$sort: {foo: 1}}, - {$limit: 5} -]); -// Test find commands containing non-object fields -assert.commandWorked(testDB.runCommand({find: collA.getName(), limit: 20})); -assert.commandWorked(testDB.runCommand({find: collA.getName(), skip: 199})); -collA.find().skip(100); - -// findOne has a nonobject field, $limit. -collB.findOne(); -collB.findOne({foo: 1}); - -// Test non-object field $unwind -confirmAggSuccess( - collA.getName(), [{ - "$facet": { - "productOfJoin": [ - {"$lookup": {"from": collB.getName(), "pipeline": [{"$match": {}}], "as": "join"}}, - {"$unwind": "$join"}, - {"$project": {"str": 1}} - ] - } - }]); - -MongoRunner.stopMongod(conn); -}()); diff --git a/jstests/noPassthrough/telemetry/application_name_find.js b/jstests/noPassthrough/telemetry/application_name_find.js new file mode 100644 index 00000000000..35b86a95f53 --- /dev/null +++ b/jstests/noPassthrough/telemetry/application_name_find.js @@ -0,0 +1,39 @@ +/** + * Test that applicationName and namespace appear in telemetry for the find command. + * @tags: [featureFlagTelemetry] + */ +load("jstests/libs/telemetry_utils.js"); +(function() { +"use strict"; + +const kApplicationName = "MongoDB Shell"; +const kHashedCollName = "w6Ax20mVkbJu4wQWAMjL8Sl+DfXAr2Zqdc3kJRB7Oo0="; +const kHashedFieldName = "lU7Z0mLRPRUL+RfAD5jhYPRRpXBsZBxS/20EzDwfOG4="; + +// Turn on the collecting of telemetry metrics. +let options = { + setParameter: {internalQueryConfigureTelemetrySamplingRate: -1}, +}; + +const conn = MongoRunner.runMongod(options); +conn.setLogLevel(3, "query"); +const testDB = conn.getDB('test'); +var coll = testDB[jsTestName()]; +coll.drop(); + +coll.insert({v: 1}); +coll.insert({v: 2}); +coll.insert({v: 3}); + +coll.find({v: 1}).toArray(); + +let telemetry = getTelemetry(conn); +assert.eq(1, telemetry.length, telemetry); +assert.eq(kApplicationName, telemetry[0].key.applicationName, telemetry); + +telemetry = getTelemetryRedacted(conn, true); +assert.eq(1, telemetry.length, telemetry); +assert.eq(kApplicationName, telemetry[0].key.applicationName, telemetry); + +MongoRunner.stopMongod(conn); +}()); diff --git a/jstests/noPassthrough/telemetry/clear_telemetry_store.js b/jstests/noPassthrough/telemetry/clear_telemetry_store.js new file mode 100644 index 00000000000..b2409cc0bbb --- /dev/null +++ b/jstests/noPassthrough/telemetry/clear_telemetry_store.js @@ -0,0 +1,46 @@ +/** + * Test that the telemetry store can be cleared when the cache size is reset to 0. + * @tags: [featureFlagTelemetry] + */ +load("jstests/libs/telemetry_utils.js"); // For verifyMetrics. + +(function() { +"use strict"; + +// Turn on the collecting of telemetry metrics. +let options = { + setParameter: { + internalQueryConfigureTelemetrySamplingRate: -1, + internalQueryConfigureTelemetryCacheSize: "10MB" + }, +}; + +const conn = MongoRunner.runMongod(options); +const testDB = conn.getDB('test'); +var coll = testDB[jsTestName()]; +coll.drop(); + +let query = {}; +for (var j = 0; j < 10; ++j) { + query["foo.field.xyz." + j] = 1; + query["bar.field.xyz." + j] = 2; + query["baz.field.xyz." + j] = 3; + coll.aggregate([{$match: query}]).itcount(); +} + +// Confirm number of entries in the store and that none have been evicted. +let telemetryResults = testDB.getSiblingDB("admin").aggregate([{$telemetry: {}}]).toArray(); +assert.eq(telemetryResults.length, 10, telemetryResults); +assert.eq(testDB.serverStatus().metrics.telemetry.numEvicted, 0); + +// Command to clear the cache. +assert.commandWorked( + testDB.adminCommand({setParameter: 1, internalQueryConfigureTelemetryCacheSize: "0MB"})); + +// 10 regular queries plus the $telemetry query, means 11 entries evicted when the cache is cleared. +assert.eq(testDB.serverStatus().metrics.telemetry.numEvicted, 11); + +// Calling $telemetry should fail when the telemetry store size is 0 bytes. +assert.throwsWithCode(() => testDB.getSiblingDB("admin").aggregate([{$telemetry: {}}]), 6579000); +MongoRunner.stopMongod(conn); +}()); diff --git a/jstests/noPassthrough/telemetry/documentSourceTelemetry_redaction_parameters.js b/jstests/noPassthrough/telemetry/documentSourceTelemetry_redaction_parameters.js new file mode 100644 index 00000000000..c4f785abf6a --- /dev/null +++ b/jstests/noPassthrough/telemetry/documentSourceTelemetry_redaction_parameters.js @@ -0,0 +1,100 @@ +/** + * Test the $telemetry hmac properties. + * @tags: [featureFlagTelemetry] + */ + +load("jstests/aggregation/extras/utils.js"); // For assertAdminDBErrCodeAndErrMsgContains. +load("jstests/libs/telemetry_utils.js"); + +(function() { +"use strict"; + +// Assert the expected telemetry key with no hmac. +function assertTelemetryKeyWithoutHmac(telemetryKey) { + assert.eq(telemetryKey.filter, {"foo": {"$lte": "?number"}}); + assert.eq(telemetryKey.sort, {"bar": -1}); + assert.eq(telemetryKey.limit, "?number"); +} + +function runTest(conn) { + const testDB = conn.getDB('test'); + var coll = testDB[jsTestName()]; + coll.drop(); + + coll.insert({foo: 1}); + coll.find({foo: {$lte: 2}}).sort({bar: -1}).limit(2).toArray(); + // Default is no hmac. + assertTelemetryKeyWithoutHmac(getTelemetry(conn)[0].key.queryShape); + + // Turning on hmac should apply hmac to all field names on all entries, even previously cached + // ones. + const telemetryKey = getTelemetryRedacted(conn)[0]["key"]; + assert.eq(telemetryKey.queryShape.filter, + {"fNWkKfogMv6MJ77LpBcuPrO7Nq+R+7TqtD+Lgu3Umc4=": {"$lte": "?number"}}); + assert.eq(telemetryKey.queryShape.sort, {"CDDQIXZmDehLKmQcRxtdOQjMqoNqfI2nGt2r4CgJ52o=": -1}); + assert.eq(telemetryKey.queryShape.limit, "?number"); + + // Turning hmac back off should preserve field names on all entries, even previously cached + // ones. + assertTelemetryKeyWithoutHmac(getTelemetry(conn)[0]["key"].queryShape); + + // Explicitly set applyHmacToIdentifiers to false. + assertTelemetryKeyWithoutHmac(getTelemetryRedacted(conn, false)[0]["key"].queryShape); + + // Wrong parameter name throws error. + let pipeline = [{$telemetry: {redactFields: true}}]; + assertAdminDBErrCodeAndErrMsgContains( + coll, + pipeline, + ErrorCodes.FailedToParse, + "$telemetry parameters object may only contain 'applyHmacToIdentifiers' or 'hmacKey' options. Found: redactFields"); + + // Wrong parameter type throws error. + pipeline = [{$telemetry: {applyHmacToIdentifiers: 1}}]; + assertAdminDBErrCodeAndErrMsgContains( + coll, + pipeline, + ErrorCodes.FailedToParse, + "$telemetry applyHmacToIdentifiers parameter must be boolean. Found type: double"); + + pipeline = [{$telemetry: {hmacKey: 1}}]; + assertAdminDBErrCodeAndErrMsgContains( + coll, + pipeline, + ErrorCodes.FailedToParse, + "$telemetry hmacKey parameter must be bindata of length 32 or greater. Found type: double"); + + // Parameter object with unrecognized key throws error. + pipeline = [{$telemetry: {applyHmacToIdentifiers: true, hmacStrategy: "on"}}]; + assertAdminDBErrCodeAndErrMsgContains( + coll, + pipeline, + ErrorCodes.FailedToParse, + "$telemetry parameters object may only contain 'applyHmacToIdentifiers' or 'hmacKey' options. Found: hmacStrategy"); +} + +const conn = MongoRunner.runMongod({ + setParameter: { + internalQueryConfigureTelemetrySamplingRate: -1, + featureFlagTelemetry: true, + } +}); +runTest(conn); +MongoRunner.stopMongod(conn); + +const st = new ShardingTest({ + mongos: 1, + shards: 1, + config: 1, + rs: {nodes: 1}, + mongosOptions: { + setParameter: { + internalQueryConfigureTelemetrySamplingRate: -1, + featureFlagTelemetry: true, + 'failpoint.skipClusterParameterRefresh': "{'mode':'alwaysOn'}" + } + }, +}); +runTest(st.s); +st.stop(); +}()); diff --git a/jstests/noPassthrough/telemetry/feature_flag_off_sampling_rate_on.js b/jstests/noPassthrough/telemetry/feature_flag_off_sampling_rate_on.js new file mode 100644 index 00000000000..7fbc079cc7b --- /dev/null +++ b/jstests/noPassthrough/telemetry/feature_flag_off_sampling_rate_on.js @@ -0,0 +1,54 @@ +/** + * Test that calls to read from telemetry store fail when feature flag is turned off and sampling + * rate > 0. + */ +load('jstests/libs/analyze_plan.js'); +load("jstests/libs/feature_flag_util.js"); + +(function() { +"use strict"; + +// Set sampling rate to -1. +let options = { + setParameter: {internalQueryConfigureTelemetrySamplingRate: -1}, +}; +const conn = MongoRunner.runMongod(options); +const testdb = conn.getDB('test'); + +// This test specifically tests error handling when the feature flag is not on. +// TODO SERVER-65800 This test can be deleted when the feature is on by default. +if (!conn || FeatureFlagUtil.isEnabled(testdb, "Telemetry")) { + jsTestLog(`Skipping test since feature flag is disabled. conn: ${conn}`); + if (conn) { + MongoRunner.stopMongod(conn); + } + return; +} + +var coll = testdb[jsTestName()]; +coll.drop(); + +// Bulk insert documents to reduces roundtrips and make timeout on a slow machine less likely. +const bulk = coll.initializeUnorderedBulkOp(); +for (let i = 1; i <= 20; i++) { + bulk.insert({foo: 0, bar: Math.floor(Math.random() * 3)}); +} +assert.commandWorked(bulk.execute()); + +// Pipeline to read telemetry store should fail without feature flag turned on even though sampling +// rate is > 0. +assert.commandFailedWithCode( + testdb.adminCommand({aggregate: 1, pipeline: [{$telemetry: {}}], cursor: {}}), + ErrorCodes.QueryFeatureNotAllowed); + +// Pipeline, with a filter, to read telemetry store fails without feature flag turned on even though +// sampling rate is > 0. +assert.commandFailedWithCode(testdb.adminCommand({ + aggregate: 1, + pipeline: [{$telemetry: {}}, {$match: {"key.queryShape.find": {$eq: "###"}}}], + cursor: {} +}), + ErrorCodes.QueryFeatureNotAllowed); + +MongoRunner.stopMongod(conn); +}()); diff --git a/jstests/noPassthrough/telemetry/query_stats_key.js b/jstests/noPassthrough/telemetry/query_stats_key.js new file mode 100644 index 00000000000..68d77110bc6 --- /dev/null +++ b/jstests/noPassthrough/telemetry/query_stats_key.js @@ -0,0 +1,111 @@ +/** + * This test confirms that telemetry store key fields are properly nested and none are missing. + * @tags: [featureFlagTelemetry] + */ +load("jstests/libs/telemetry_utils.js"); +(function() { +"use strict"; + +function confirmAllFieldsPresent(queryStatsEntries) { + const kApplicationName = "MongoDB Shell"; + const queryShapeFindFields = [ + "cmdNs", + "command", + "filter", + "sort", + "projection", + "hint", + "skip", + "limit", + "singleBatch", + "max", + "min", + "returnKey", + "showRecordId", + "tailable", + "oplogReplay", + "awaitData", + "collation", + "allowDiskUse", + "let" + ]; + + // The outer fields not nested inside queryShape. + const queryStatsKeyFields = [ + "queryShape", + "batchSize", + "comment", + "maxTimeMS", + "noCursorTimeout", + "readConcern", + "allowPartialResults", + "applicationName" + ]; + + for (const entry of queryStatsEntries) { + let fieldCounter = 0; + assert.eq(entry.key.queryShape.command, "find"); + assert.eq(entry.key.applicationName, kApplicationName); + + for (const field in entry.key.queryShape) { + assert(queryShapeFindFields.includes(field)); + fieldCounter++; + } + assert.eq(fieldCounter, queryShapeFindFields.length); + + fieldCounter = 0; + for (const field in entry.key) { + assert(queryStatsKeyFields.includes(field)); + fieldCounter++; + } + assert.eq(fieldCounter, queryStatsKeyFields.length); + } +} + +// Turn on the collecting of telemetry metrics. +let options = { + setParameter: {internalQueryConfigureTelemetrySamplingRate: -1}, +}; + +const conn = MongoRunner.runMongod(options); +const testDB = conn.getDB('test'); +var coll = testDB[jsTestName()]; +coll.drop(); + +// Have to create an index for hint not to fail. +assert.commandWorked(coll.createIndex({v: 1})); + +let commandObj = { + find: coll.getName(), + filter: {v: {$eq: 2}}, + oplogReplay: true, + comment: "this is a test!!", + min: {"v": 0}, + max: {"v": 4}, + hint: {"v": 1}, + sort: {a: -1}, + returnKey: false, + noCursorTimeout: true, + showRecordId: false, + tailable: false, + awaitData: false, + allowPartialResults: true, + skip: 1, + limit: 2, + maxTimeMS: 500, + collation: {locale: "en_US", strength: 2}, + allowDiskUse: true, + readConcern: {level: "local"}, + batchSize: 2, + singleBatch: true, + let : {}, + projection: {_id: 0}, +}; + +assert.commandWorked(testDB.runCommand(commandObj)); +let telemetry = getTelemetry(conn); +assert.eq(1, telemetry.length); +confirmAllFieldsPresent(telemetry); + +MongoRunner.stopMongod(conn); +}()); diff --git a/jstests/noPassthrough/telemetry/redact_queries_with_nonobject_fields.js b/jstests/noPassthrough/telemetry/redact_queries_with_nonobject_fields.js new file mode 100644 index 00000000000..25cac47555e --- /dev/null +++ b/jstests/noPassthrough/telemetry/redact_queries_with_nonobject_fields.js @@ -0,0 +1,76 @@ +/** + * Test that telemetry key generation works for queries with non-object fields. + * @tags: [featureFlagTelemetry] + */ +load('jstests/libs/analyze_plan.js'); + +(function() { +"use strict"; + +// Turn on the collecting of telemetry metrics. +let options = { + setParameter: {internalQueryConfigureTelemetrySamplingRate: -1}, +}; + +const conn = MongoRunner.runMongod(options); +const testDB = conn.getDB('test'); +var collA = testDB[jsTestName()]; +var collB = testDB[jsTestName() + 'Two']; +collA.drop(); +collB.drop(); + +for (var i = 0; i < 200; i++) { + collA.insert({foo: 0, bar: Math.floor(Math.random() * 3)}); + collA.insert({foo: 1, bar: Math.floor(Math.random() * -2)}); + collB.insert({foo: Math.floor(Math.random() * 2), bar: Math.floor(Math.random() * 2)}); +} + +function confirmAggSuccess(collName, pipeline) { + const command = {aggregate: collName, cursor: {}}; + command.pipeline = pipeline; + assert.commandWorked(testDB.runCommand(command)); +} +// Test with non-object fields $limit and $skip. +confirmAggSuccess(collA.getName(), [{$sort: {bar: -1}}, {$limit: 2}, {$match: {foo: {$lte: 2}}}]); +confirmAggSuccess(collA.getName(), [{$sort: {bar: -1}}, {$skip: 50}, {$match: {foo: {$lte: 2}}}]); +confirmAggSuccess(collA.getName(), + [{$sort: {bar: -1}}, {$limit: 2}, {$skip: 50}, {$match: {foo: 0}}]); + +// Test non-object field, $unionWith. +confirmAggSuccess(collA.getName(), [{$unionWith: collB.getName()}]); + +// Test $limit in $setWindowFields for good measure. +confirmAggSuccess(collA.getName(), [ + {$_internalInhibitOptimization: {}}, + { + $setWindowFields: { + sortBy: {foo: 1}, + output: {sum: {$sum: "$bar", window: {documents: ["unbounded", "current"]}}} + } + }, + {$sort: {foo: 1}}, + {$limit: 5} +]); +// Test find commands containing non-object fields +assert.commandWorked(testDB.runCommand({find: collA.getName(), limit: 20})); +assert.commandWorked(testDB.runCommand({find: collA.getName(), skip: 199})); +collA.find().skip(100); + +// findOne has a nonobject field, $limit. +collB.findOne(); +collB.findOne({foo: 1}); + +// Test non-object field $unwind +confirmAggSuccess( + collA.getName(), [{ + "$facet": { + "productOfJoin": [ + {"$lookup": {"from": collB.getName(), "pipeline": [{"$match": {}}], "as": "join"}}, + {"$unwind": "$join"}, + {"$project": {"str": 1}} + ] + } + }]); + +MongoRunner.stopMongod(conn); +}()); diff --git a/jstests/noPassthrough/telemetry/telemetry_collect_on_mongos.js b/jstests/noPassthrough/telemetry/telemetry_collect_on_mongos.js new file mode 100644 index 00000000000..ff9fadc85c7 --- /dev/null +++ b/jstests/noPassthrough/telemetry/telemetry_collect_on_mongos.js @@ -0,0 +1,275 @@ +/** + * Test that mongos is collecting telemetry metrics. + * @tags: [featureFlagTelemetry] + */ + +load('jstests/libs/telemetry_utils.js'); + +(function() { +"use strict"; + +// Redacted literal replacement string. This may change in the future, so it's factored out. +const aggRedactString = "###"; +const setup = () => { + const st = new ShardingTest({ + mongos: 1, + shards: 1, + config: 1, + rs: {nodes: 1}, + mongosOptions: { + setParameter: { + internalQueryConfigureTelemetrySamplingRate: -1, + 'failpoint.skipClusterParameterRefresh': "{'mode':'alwaysOn'}" + } + }, + }); + const mongos = st.s; + const db = mongos.getDB("test"); + const coll = db.coll; + coll.insert({v: 1}); + coll.insert({v: 4}); + return st; +}; + +const assertExpectedResults = (results, + expectedTelemetryKey, + expectedExecCount, + expectedDocsReturnedSum, + expectedDocsReturnedMax, + expectedDocsReturnedMin, + expectedDocsReturnedSumOfSq) => { + const {key, metrics} = results; + assert.eq(expectedTelemetryKey, key); + assert.eq(expectedExecCount, metrics.execCount); + assert.docEq({ + sum: NumberLong(expectedDocsReturnedSum), + max: NumberLong(expectedDocsReturnedMax), + min: NumberLong(expectedDocsReturnedMin), + sumOfSquares: NumberLong(expectedDocsReturnedSumOfSq) + }, + metrics.docsReturned); + + // This test can't predict exact timings, so just assert these three fields have been set (are + // non-zero). + const {firstSeenTimestamp, lastExecutionMicros, queryExecMicros} = metrics; + + assert.neq(timestampCmp(firstSeenTimestamp, Timestamp(0, 0)), 0); + assert.neq(lastExecutionMicros, NumberLong(0)); + + const distributionFields = ['sum', 'max', 'min', 'sumOfSquares']; + for (const field of distributionFields) { + assert.neq(queryExecMicros[field], NumberLong(0)); + } +}; + +// Assert that, for find queries, no telemetry results are written until a cursor has reached +// exhaustion; ensure accurate results once they're written. +{ + const st = setup(); + const db = st.s.getDB("test"); + const collName = "coll"; + const coll = db[collName]; + + const telemetryKey = { + queryShape: { + cmdNs: {db: "test", coll: "coll"}, + command: "find", + filter: {$and: [{v: {$gt: "?number"}}, {v: {$lt: "?number"}}]}, + }, + readConcern: {level: "local", provenance: "implicitDefault"}, + batchSize: "?number", + applicationName: "MongoDB Shell", + }; + + const cursor = coll.find({v: {$gt: 0, $lt: 5}}).batchSize(1); // returns 1 doc + + // Since the cursor hasn't been exhausted yet, ensure no telemetry results have been written + // yet. + let telemetry = getTelemetry(db); + assert.eq(0, telemetry.length, telemetry); + + // Run a getMore to exhaust the cursor, then ensure telemetry results have been written + // accurately. batchSize must be 2 so the cursor recognizes exhaustion. + assert.commandWorked(db.runCommand({ + getMore: cursor.getId(), + collection: coll.getName(), + batchSize: 2 + })); // returns 1 doc, exhausts the cursor + // The $telemetry query for the previous `getTelemetry` is included in this call to $telemetry. + telemetry = getTelemetry(db); + assert.eq(2, telemetry.length, telemetry); + assertExpectedResults(telemetry[0], + telemetryKey, + /* expectedExecCount */ 1, + /* expectedDocsReturnedSum */ 2, + /* expectedDocsReturnedMax */ 2, + /* expectedDocsReturnedMin */ 2, + /* expectedDocsReturnedSumOfSq */ 4); + + // Run more queries (to exhaustion) with the same query shape, and ensure telemetry results are + // accurate. + coll.find({v: {$gt: 2, $lt: 3}}).batchSize(10).toArray(); // returns 0 docs + coll.find({v: {$gt: 0, $lt: 1}}).batchSize(10).toArray(); // returns 0 docs + coll.find({v: {$gt: 0, $lt: 2}}).batchSize(10).toArray(); // return 1 doc + telemetry = getTelemetry(db); + assert.eq(2, telemetry.length, telemetry); + assertExpectedResults(telemetry[0], + telemetryKey, + /* expectedExecCount */ 4, + /* expectedDocsReturnedSum */ 3, + /* expectedDocsReturnedMax */ 2, + /* expectedDocsReturnedMin */ 0, + /* expectedDocsReturnedSumOfSq */ 5); + + st.stop(); +} + +// Assert that, for agg queries, no telemetry results are written until a cursor has reached +// exhaustion; ensure accurate results once they're written. +{ + const st = setup(); + const db = st.s.getDB("test"); + const coll = db.coll; + + const telemetryKey = { + pipeline: [ + {$match: {v: {$gt: aggRedactString, $lt: aggRedactString}}}, + {$project: {hello: aggRedactString}}, + ], + namespace: "test.coll", + applicationName: "MongoDB Shell" + }; + + const cursor = coll.aggregate( + [ + {$match: {v: {$gt: 0, $lt: 5}}}, + {$project: {hello: "$world"}}, + ], + {cursor: {batchSize: 1}}); // returns 1 doc + + // Since the cursor hasn't been exhausted yet, ensure no telemetry results have been written + // yet. + let telemetry = getTelemetry(db); + assert.eq(0, telemetry.length, telemetry); + + // Run a getMore to exhaust the cursor, then ensure telemetry results have been written + // accurately. batchSize must be 2 so the cursor recognizes exhaustion. + assert.commandWorked(db.runCommand({ + getMore: cursor.getId(), + collection: coll.getName(), + batchSize: 2 + })); // returns 1 doc, exhausts the cursor + // The $telemetry query for the previous `getTelemetry` is included in this call to $telemetry. + telemetry = getTelemetry(db); + assert.eq(2, telemetry.length, telemetry); + assertExpectedResults(telemetry[0], + telemetryKey, + /* expectedExecCount */ 1, + /* expectedDocsReturnedSum */ 2, + /* expectedDocsReturnedMax */ 2, + /* expectedDocsReturnedMin */ 2, + /* expectedDocsReturnedSumOfSq */ 4); + + // Run more queries (to exhaustion) with the same query shape, and ensure telemetry results are + // accurate. + coll.aggregate([ + {$match: {v: {$gt: 0, $lt: 5}}}, + {$project: {hello: "$world"}}, + ]); // returns 2 docs + coll.aggregate([ + {$match: {v: {$gt: 2, $lt: 3}}}, + {$project: {hello: "$universe"}}, + ]); // returns 0 docs + coll.aggregate([ + {$match: {v: {$gt: 0, $lt: 2}}}, + {$project: {hello: "$galaxy"}}, + ]); // returns 1 doc + telemetry = getTelemetry(db); + assert.eq(2, telemetry.length, telemetry); + assertExpectedResults(telemetry[0], + telemetryKey, + /* expectedExecCount */ 4, + /* expectedDocsReturnedSum */ 5, + /* expectedDocsReturnedMax */ 2, + /* expectedDocsReturnedMin */ 0, + /* expectedDocsReturnedSumOfSq */ 9); + + st.stop(); +} + +// Assert on batchSize-limited find queries that killCursors will write metrics with partial results +// to the telemetry store. +{ + const st = setup(); + const db = st.s.getDB("test"); + const collName = "coll"; + const coll = db[collName]; + + const telemetryKey = { + queryShape: { + cmdNs: {db: "test", coll: "coll"}, + command: "find", + filter: {$and: [{v: {$gt: "?number"}}, {v: {$lt: "?number"}}]}, + }, + readConcern: {level: "local", provenance: "implicitDefault"}, + batchSize: "?number", + applicationName: "MongoDB Shell" + }; + + const cursor1 = coll.find({v: {$gt: 0, $lt: 5}}).batchSize(1); // returns 1 doc + const cursor2 = coll.find({v: {$gt: 0, $lt: 2}}).batchSize(1); // returns 1 doc + + assert.commandWorked( + db.runCommand({killCursors: coll.getName(), cursors: [cursor1.getId(), cursor2.getId()]})); + + const telemetry = getTelemetry(db); + assert.eq(1, telemetry.length); + assertExpectedResults(telemetry[0], + telemetryKey, + /* expectedExecCount */ 2, + /* expectedDocsReturnedSum */ 2, + /* expectedDocsReturnedMax */ 1, + /* expectedDocsReturnedMin */ 1, + /* expectedDocsReturnedSumOfSq */ 2); + st.stop(); +} + +// Assert on batchSize-limited agg queries that killCursors will write metrics with partial results +// to the telemetry store. +{ + const st = setup(); + const db = st.s.getDB("test"); + const coll = db.coll; + + const telemetryKey = { + pipeline: [{$match: {v: {$gt: aggRedactString, $lt: aggRedactString}}}], + namespace: `test.${coll.getName()}`, + applicationName: "MongoDB Shell" + }; + + const cursor1 = coll.aggregate( + [ + {$match: {v: {$gt: 0, $lt: 5}}}, + ], + {cursor: {batchSize: 1}}); // returns 1 doc + const cursor2 = coll.aggregate( + [ + {$match: {v: {$gt: 0, $lt: 2}}}, + ], + {cursor: {batchSize: 1}}); // returns 1 doc + + assert.commandWorked( + db.runCommand({killCursors: coll.getName(), cursors: [cursor1.getId(), cursor2.getId()]})); + + const telemetry = getTelemetry(db); + assert.eq(1, telemetry.length); + assertExpectedResults(telemetry[0], + telemetryKey, + /* expectedExecCount */ 2, + /* expectedDocsReturnedSum */ 2, + /* expectedDocsReturnedMax */ 1, + /* expectedDocsReturnedMin */ 1, + /* expectedDocsReturnedSumOfSq */ 2); + st.stop(); +} +}()); diff --git a/jstests/noPassthrough/telemetry/telemetry_feature_flag.js b/jstests/noPassthrough/telemetry/telemetry_feature_flag.js new file mode 100644 index 00000000000..4071b732796 --- /dev/null +++ b/jstests/noPassthrough/telemetry/telemetry_feature_flag.js @@ -0,0 +1,34 @@ +/** + * Test that calls to read from telemetry store fail when feature flag is turned off. + */ +load('jstests/libs/analyze_plan.js'); +load("jstests/libs/feature_flag_util.js"); + +(function() { +"use strict"; + +// This test specifically tests error handling when the feature flag is not on. +// TODO SERVER-65800 this test can be removed when the feature flag is removed. +const conn = MongoRunner.runMongod(); +const testDB = conn.getDB('test'); +if (FeatureFlagUtil.isEnabled(testDB, "Telemetry")) { + jsTestLog("Skipping test since telemetry is enabled."); + MongoRunner.stopMongod(conn); + return; +} + +// Pipeline to read telemetry store should fail without feature flag turned on. +assert.commandFailedWithCode( + testDB.adminCommand({aggregate: 1, pipeline: [{$telemetry: {}}], cursor: {}}), + ErrorCodes.QueryFeatureNotAllowed); + +// Pipeline, with a filter, to read telemetry store fails without feature flag turned on. +assert.commandFailedWithCode(testDB.adminCommand({ + aggregate: 1, + pipeline: [{$telemetry: {}}, {$match: {"key.queryShape.find": {$eq: "###"}}}], + cursor: {} +}), + ErrorCodes.QueryFeatureNotAllowed); + +MongoRunner.stopMongod(conn); +}()); diff --git a/jstests/noPassthrough/telemetry/telemetry_metrics_across_getMore_calls.js b/jstests/noPassthrough/telemetry/telemetry_metrics_across_getMore_calls.js new file mode 100644 index 00000000000..91605c5e069 --- /dev/null +++ b/jstests/noPassthrough/telemetry/telemetry_metrics_across_getMore_calls.js @@ -0,0 +1,159 @@ +/** + * Test that the telemetry metrics are aggregated properly by distinct query shape over getMore + * calls. + * @tags: [featureFlagTelemetry] + */ +load("jstests/libs/telemetry_utils.js"); // For verifyMetrics. + +(function() { +"use strict"; + +// Turn on the collecting of telemetry metrics. +let options = { + setParameter: {internalQueryConfigureTelemetrySamplingRate: -1}, +}; + +const conn = MongoRunner.runMongod(options); +const testDB = conn.getDB('test'); +var coll = testDB[jsTestName()]; +coll.drop(); + +// Bulk insert documents to reduces roundtrips and make timeout on a slow machine less likely. +const bulk = coll.initializeUnorderedBulkOp(); +const numDocs = 100; +for (let i = 0; i < numDocs / 2; ++i) { + bulk.insert({foo: 0, bar: Math.floor(Math.random() * 3)}); + bulk.insert({foo: 1, bar: Math.floor(Math.random() * -2)}); +} +assert.commandWorked(bulk.execute()); + +// Assert that two queries with identical structures are represented by the same key. +{ + // Note: toArray() is necessary for the batchSize-limited query to run to cursor exhaustion + // (when it writes to the telemetry store). + coll.aggregate([{$match: {foo: 1}}], {cursor: {batchSize: 2}}).toArray(); + coll.aggregate([{$match: {foo: 0}}], {cursor: {batchSize: 2}}).toArray(); + + // This command will return all telemetry store entires. + const telemetryResults = testDB.getSiblingDB("admin").aggregate([{$telemetry: {}}]).toArray(); + // Assert there is only one entry. + assert.eq(telemetryResults.length, 1, telemetryResults); + const telemetryEntry = telemetryResults[0]; + assert.eq(telemetryEntry.key.namespace, `test.${jsTestName()}`); + assert.eq(telemetryEntry.key.applicationName, "MongoDB Shell"); + + // Assert we update execution count for identically shaped queries. + assert.eq(telemetryEntry.metrics.execCount, 2); + + // Assert telemetry values are accurate for the two above queries. + assert.eq(telemetryEntry.metrics.docsReturned.sum, numDocs); + assert.eq(telemetryEntry.metrics.docsReturned.min, numDocs / 2); + assert.eq(telemetryEntry.metrics.docsReturned.max, numDocs / 2); + + verifyMetrics(telemetryResults); +} + +const fooEqBatchSize = 5; +const fooNeBatchSize = 3; +// Assert on batchSize-limited queries that killCursors will write metrics with partial results to +// the telemetry store. +{ + let cursor1 = coll.find({foo: {$eq: 0}}).batchSize(fooEqBatchSize); + let cursor2 = coll.find({foo: {$ne: 0}}).batchSize(fooNeBatchSize); + // Issue one getMore for the first query, so 2 * fooEqBatchSize documents are returned total. + assert.commandWorked(testDB.runCommand( + {getMore: cursor1.getId(), collection: coll.getName(), batchSize: fooEqBatchSize})); + + // Kill both cursors so the telemetry metrics are stored. + assert.commandWorked(testDB.runCommand( + {killCursors: coll.getName(), cursors: [cursor1.getId(), cursor2.getId()]})); + + // This filters telemetry entires to just the ones entered when running above find queries. + const telemetryResults = testDB.getSiblingDB("admin") + .aggregate([ + {$telemetry: {}}, + {$match: {"key.queryShape.filter.foo": {$exists: true}}}, + {$sort: {key: 1}}, + ]) + .toArray(); + assert.eq(telemetryResults.length, 2, telemetryResults); + assert.eq(telemetryResults[0].key.queryShape.cmdNs.db, "test"); + assert.eq(telemetryResults[0].key.queryShape.cmdNs.coll, jsTestName()); + assert.eq(telemetryResults[0].key.applicationName, "MongoDB Shell"); + assert.eq(telemetryResults[1].key.queryShape.cmdNs.db, "test"); + assert.eq(telemetryResults[1].key.queryShape.cmdNs.coll, jsTestName()); + assert.eq(telemetryResults[1].key.applicationName, "MongoDB Shell"); + + assert.eq(telemetryResults[0].metrics.execCount, 1); + assert.eq(telemetryResults[1].metrics.execCount, 1); + assert.eq(telemetryResults[0].metrics.docsReturned.sum, fooEqBatchSize * 2); + assert.eq(telemetryResults[1].metrics.docsReturned.sum, fooNeBatchSize); + + verifyMetrics(telemetryResults); +} + +// Assert that options such as limit/sort create different keys, and that repeating a query shape +// ({foo: {$eq}}) aggregates metrics across executions. +{ + const query2Limit = 50; + coll.find({foo: {$eq: 0}}).batchSize(2).toArray(); + coll.find({foo: {$eq: 1}}).limit(query2Limit).batchSize(2).toArray(); + coll.find().sort({"foo": 1}).batchSize(2).toArray(); + // This filters telemetry entires to just the ones entered when running above find queries. + let telemetryResults = + testDB.getSiblingDB("admin") + .aggregate([{$telemetry: {}}, {$match: {"key.queryShape.command": "find"}}]) + .toArray(); + assert.eq(telemetryResults.length, 4, telemetryResults); + + verifyMetrics(telemetryResults); + + // This filters to just the telemetry for query coll.find().sort({"foo": 1}).batchSize(2). + telemetryResults = testDB.getSiblingDB("admin") + .aggregate([{$telemetry: {}}, {$match: {"key.queryShape.sort.foo": 1}}]) + .toArray(); + assert.eq(telemetryResults.length, 1, telemetryResults); + assert.eq(telemetryResults[0].key.queryShape.cmdNs.db, "test"); + assert.eq(telemetryResults[0].key.queryShape.cmdNs.coll, jsTestName()); + assert.eq(telemetryResults[0].key.applicationName, "MongoDB Shell"); + assert.eq(telemetryResults[0].metrics.execCount, 1); + assert.eq(telemetryResults[0].metrics.docsReturned.sum, numDocs); + + // This filters to just the telemetry for query coll.find({foo: {$eq: + // 1}}).limit(query2Limit).batchSize(2). + telemetryResults = + testDB.getSiblingDB("admin") + .aggregate([{$telemetry: {}}, {$match: {"key.queryShape.limit": '?number'}}]) + .toArray(); + assert.eq(telemetryResults.length, 1, telemetryResults); + assert.eq(telemetryResults[0].key.queryShape.cmdNs.db, "test"); + assert.eq(telemetryResults[0].key.queryShape.cmdNs.coll, jsTestName()); + assert.eq(telemetryResults[0].key.applicationName, "MongoDB Shell"); + assert.eq(telemetryResults[0].metrics.execCount, 1); + assert.eq(telemetryResults[0].metrics.docsReturned.sum, query2Limit); + + // This filters to just the telemetry for query coll.find({foo: {$eq: 0}}).batchSize(2). + telemetryResults = testDB.getSiblingDB("admin") + .aggregate([ + {$telemetry: {}}, + { + $match: { + "key.queryShape.filter.foo": {$eq: {$eq: "?number"}}, + "key.queryShape.limit": {$exists: false}, + "key.queryShape.sort": {$exists: false} + } + } + ]) + .toArray(); + assert.eq(telemetryResults.length, 1, telemetryResults); + assert.eq(telemetryResults[0].key.queryShape.cmdNs.db, "test"); + assert.eq(telemetryResults[0].key.queryShape.cmdNs.coll, jsTestName()); + assert.eq(telemetryResults[0].key.applicationName, "MongoDB Shell"); + assert.eq(telemetryResults[0].metrics.execCount, 2); + assert.eq(telemetryResults[0].metrics.docsReturned.sum, numDocs / 2 + 2 * fooEqBatchSize); + assert.eq(telemetryResults[0].metrics.docsReturned.max, numDocs / 2); + assert.eq(telemetryResults[0].metrics.docsReturned.min, 2 * fooEqBatchSize); +} + +MongoRunner.stopMongod(conn); +}()); diff --git a/jstests/noPassthrough/telemetry/telemetry_redact_find_cmd.js b/jstests/noPassthrough/telemetry/telemetry_redact_find_cmd.js new file mode 100644 index 00000000000..54b909adae9 --- /dev/null +++ b/jstests/noPassthrough/telemetry/telemetry_redact_find_cmd.js @@ -0,0 +1,69 @@ +/** + * Test that $telemetry properly applies hmac to find commands, on mongod and mongos. + */ +load("jstests/libs/telemetry_utils.js"); +(function() { +"use strict"; + +const kHashedCollName = "w6Ax20mVkbJu4wQWAMjL8Sl+DfXAr2Zqdc3kJRB7Oo0="; +const kHashedFieldName = "lU7Z0mLRPRUL+RfAD5jhYPRRpXBsZBxS/20EzDwfOG4="; + +function runTest(conn) { + const db = conn.getDB("test"); + const admin = conn.getDB("admin"); + + db.test.drop(); + db.test.insert({v: 1}); + + db.test.find({v: 1}).toArray(); + + let telemetry = getTelemetryRedacted(admin); + + assert.eq(1, telemetry.length); + assert.eq("find", telemetry[0].key.queryShape.command); + assert.eq({[kHashedFieldName]: {$eq: "?number"}}, telemetry[0].key.queryShape.filter); + + db.test.insert({v: 2}); + + const cursor = db.test.find({v: {$gt: 0, $lt: 3}}).batchSize(1); + telemetry = getTelemetryRedacted(admin); + // Cursor isn't exhausted, so there shouldn't be another entry yet. + assert.eq(1, telemetry.length); + + assert.commandWorked( + db.runCommand({getMore: cursor.getId(), collection: db.test.getName(), batchSize: 2})); + + telemetry = getTelemetryRedacted(admin); + assert.eq(2, telemetry.length); + assert.eq("find", telemetry[1].key.queryShape.command); + assert.eq({ + "$and": [{[kHashedFieldName]: {"$gt": "?number"}}, {[kHashedFieldName]: {"$lt": "?number"}}] + }, + telemetry[1].key.queryShape.filter); +} + +const conn = MongoRunner.runMongod({ + setParameter: { + internalQueryConfigureTelemetrySamplingRate: -1, + featureFlagTelemetry: true, + } +}); +runTest(conn); +MongoRunner.stopMongod(conn); + +const st = new ShardingTest({ + mongos: 1, + shards: 1, + config: 1, + rs: {nodes: 1}, + mongosOptions: { + setParameter: { + internalQueryConfigureTelemetrySamplingRate: -1, + featureFlagTelemetry: true, + 'failpoint.skipClusterParameterRefresh': "{'mode':'alwaysOn'}" + } + }, +}); +runTest(st.s); +st.stop(); +}()); diff --git a/jstests/noPassthrough/telemetry/telemetry_sampling_rate.js b/jstests/noPassthrough/telemetry/telemetry_sampling_rate.js new file mode 100644 index 00000000000..1bada398a03 --- /dev/null +++ b/jstests/noPassthrough/telemetry/telemetry_sampling_rate.js @@ -0,0 +1,38 @@ +/** + * Test that calls to read from telemetry store fail when sampling rate is not greater than 0 even + * if feature flag is on. + * @tags: [featureFlagTelemetry] + */ +load('jstests/libs/analyze_plan.js'); + +(function() { +"use strict"; + +let options = { + setParameter: {internalQueryConfigureTelemetrySamplingRate: 0}, +}; + +const conn = MongoRunner.runMongod(options); +const testdb = conn.getDB('test'); +var coll = testdb[jsTestName()]; +coll.drop(); +for (var i = 0; i < 20; i++) { + coll.insert({foo: 0, bar: Math.floor(Math.random() * 3)}); +} + +coll.aggregate([{$match: {foo: 1}}], {cursor: {batchSize: 2}}); + +// Reading telemetry store with a sampling rate of 0 should return 0 documents. +let telStore = testdb.adminCommand({aggregate: 1, pipeline: [{$telemetry: {}}], cursor: {}}); +assert.eq(telStore.cursor.firstBatch.length, 0); + +// Reading telemetry store should work now with a sampling rate of greater than 0. +assert.commandWorked(testdb.adminCommand( + {setParameter: 1, internalQueryConfigureTelemetrySamplingRate: 2147483647})); +coll.aggregate([{$match: {foo: 1}}], {cursor: {batchSize: 2}}); +telStore = assert.commandWorked( + testdb.adminCommand({aggregate: 1, pipeline: [{$telemetry: {}}], cursor: {}})); +assert.eq(telStore.cursor.firstBatch.length, 1); + +MongoRunner.stopMongod(conn); +}()); diff --git a/jstests/noPassthrough/telemetry/telemetry_server_status_metrics.js b/jstests/noPassthrough/telemetry/telemetry_server_status_metrics.js new file mode 100644 index 00000000000..2235d272a9f --- /dev/null +++ b/jstests/noPassthrough/telemetry/telemetry_server_status_metrics.js @@ -0,0 +1,190 @@ +/** + * Test the telemetry related serverStatus metrics. + * @tags: [featureFlagTelemetry] + */ +load('jstests/libs/analyze_plan.js'); + +(function() { +"use strict"; + +function runTestWithMongodOptions(mongodOptions, test, testOptions) { + const conn = MongoRunner.runMongod(mongodOptions); + const testDB = conn.getDB('test'); + const coll = testDB[jsTestName()]; + + test(conn, testDB, coll, testOptions); + + MongoRunner.stopMongod(conn); +} + +/** + * Test serverStatus metric which counts the number of evicted entries. + * + * testOptions must include `resetCacheSize` bool field; e.g., { resetCacheSize : true } + */ +function evictionTest(conn, testDB, coll, testOptions) { + const evictedBefore = testDB.serverStatus().metrics.telemetry.numEvicted; + assert.eq(evictedBefore, 0); + for (var i = 0; i < 4000; i++) { + let query = {}; + query["foo" + i] = "bar"; + coll.aggregate([{$match: query}]).itcount(); + } + if (!testOptions.resetCacheSize) { + const evictedAfter = testDB.serverStatus().metrics.telemetry.numEvicted; + assert.gt(evictedAfter, 0); + return; + } + // Make sure number of evicted entries increases when the cache size is reset, which forces out + // least recently used entries to meet the new, smaller size requirement. + assert.eq(testDB.serverStatus().metrics.telemetry.numEvicted, 0); + assert.commandWorked( + testDB.adminCommand({setParameter: 1, internalQueryConfigureTelemetryCacheSize: "1MB"})); + const evictedAfter = testDB.serverStatus().metrics.telemetry.numEvicted; + assert.gt(evictedAfter, 0); +} + +/** + * Test serverStatus metric which counts the number of requests for which telemetry is not collected + * due to rate-limiting. + * + * testOptions must include `samplingRate` and `numRequests` number fields; + * e.g., { samplingRate: 2147483647, numRequests: 20 } + */ +function countRateLimitedRequestsTest(conn, testDB, coll, testOptions) { + const numRateLimitedRequestsBefore = + testDB.serverStatus().metrics.telemetry.numRateLimitedRequests; + assert.eq(numRateLimitedRequestsBefore, 0); + + coll.insert({a: 0}); + + // Running numRequests / 2 times since we dispatch two requests per iteration + for (var i = 0; i < testOptions.numRequests / 2; i++) { + coll.find({a: 0}).toArray(); + coll.aggregate([{$match: {a: 1}}]); + } + + const numRateLimitedRequestsAfter = + testDB.serverStatus().metrics.telemetry.numRateLimitedRequests; + + if (testOptions.samplingRate === 0) { + // Telemetry should not be collected for any requests. + assert.eq(numRateLimitedRequestsAfter, testOptions.numRequests); + } else if (testOptions.samplingRate >= testOptions.numRequests) { + // Telemetry should be collected for all requests. + assert.eq(numRateLimitedRequestsAfter, 0); + } else { + // Telemetry should be collected for some but not all requests. + assert.gt(numRateLimitedRequestsAfter, 0); + assert.lt(numRateLimitedRequestsAfter, testOptions.numRequests); + } +} + +function telemetryStoreSizeEstimateTest(conn, testDB, coll, testOptions) { + assert.eq(testDB.serverStatus().metrics.telemetry.telemetryStoreSizeEstimateBytes, 0); + let halfWayPointSize; + // Only using three digit numbers (eg 100, 101) means the string length will be the same for all + // entries and therefore the key size will be the same for all entries, which makes predicting + // the total size of the store clean and easy. + for (var i = 100; i < 200; i++) { + coll.aggregate([{$match: {["foo" + i]: "bar"}}]).itcount(); + if (i == 150) { + halfWayPointSize = + testDB.serverStatus().metrics.telemetry.telemetryStoreSizeEstimateBytes; + } + } + // Confirm that telemetry store has grown and size is non-zero. + assert.gt(halfWayPointSize, 0); + const fullSize = testDB.serverStatus().metrics.telemetry.telemetryStoreSizeEstimateBytes; + assert.gt(fullSize, 0); + // Make sure the final telemetry store size is twice as much as the halfway point size (+/- 5%) + assert(fullSize >= halfWayPointSize * 1.95 && fullSize <= halfWayPointSize * 2.05, + tojson({fullSize, halfWayPointSize})); +} + +function telemetryStoreWriteErrorsTest(conn, testDB, coll, testOptions) { + const debugBuild = testDB.adminCommand('buildInfo').debug; + if (debugBuild) { + jsTestLog("Skipping telemetry store write errors test because debug build will tassert."); + return; + } + + const errorsBefore = testDB.serverStatus().metrics.telemetry.numTelemetryStoreWriteErrors; + assert.eq(errorsBefore, 0); + for (let i = 0; i < 5; i++) { + // Command should succeed and record the error. + let query = {}; + query["foo" + i] = "bar"; + coll.aggregate([{$match: query}]).itcount(); + } + + // Make sure that we recorded a write error for each run. + // TODO SERVER-73152 we attempt to write to the telemetry store twice for each aggregate, which + // seems wrong. + assert.eq(testDB.serverStatus().metrics.telemetry.numTelemetryStoreWriteErrors, 10); +} + +/** + * In this configuration, we insert enough entries into the telemetry store to trigger LRU + * eviction. + */ +runTestWithMongodOptions({ + setParameter: { + internalQueryConfigureTelemetryCacheSize: "1MB", + internalQueryConfigureTelemetrySamplingRate: -1 + }, +}, + evictionTest, + {resetCacheSize: false}); +/** + * In this configuration, eviction is triggered only when the telemetry store size is reset. + * */ +runTestWithMongodOptions({ + setParameter: { + internalQueryConfigureTelemetryCacheSize: "4MB", + internalQueryConfigureTelemetrySamplingRate: -1 + }, +}, + evictionTest, + {resetCacheSize: true}); + +/** + * In this configuration, every query is sampled, so no requests should be rate-limited. + */ +runTestWithMongodOptions({ + setParameter: {internalQueryConfigureTelemetrySamplingRate: -1}, +}, + countRateLimitedRequestsTest, + {samplingRate: 2147483647, numRequests: 20}); + +/** + * In this configuration, the sampling rate is set so that some but not all requests are + * rate-limited. + */ +runTestWithMongodOptions({ + setParameter: {internalQueryConfigureTelemetrySamplingRate: 10}, +}, + countRateLimitedRequestsTest, + {samplingRate: 10, numRequests: 20}); + +/** + * Sample all queries and assert that the size of telemetry store is equal to num entries * entry + * size + */ +runTestWithMongodOptions({ + setParameter: {internalQueryConfigureTelemetrySamplingRate: -1}, +}, + telemetryStoreSizeEstimateTest); + +/** + * Use a very small telemetry store size and assert that errors in writing to the telemetry store + * are tracked. + */ +runTestWithMongodOptions({ + setParameter: { + internalQueryConfigureTelemetryCacheSize: "0.00001MB", + internalQueryConfigureTelemetrySamplingRate: -1 + }, +}, + telemetryStoreWriteErrorsTest); +}()); diff --git a/jstests/noPassthrough/telemetry/telemetry_upgrade.js b/jstests/noPassthrough/telemetry/telemetry_upgrade.js new file mode 100644 index 00000000000..f396d23b948 --- /dev/null +++ b/jstests/noPassthrough/telemetry/telemetry_upgrade.js @@ -0,0 +1,43 @@ +/** + * Test that telemetry doesn't work on a lower FCV version but works after an FCV upgrade. + * @tags: [featureFlagTelemetry] + */ +load('jstests/libs/analyze_plan.js'); +load("jstests/libs/feature_flag_util.js"); + +(function() { +"use strict"; + +const dbpath = MongoRunner.dataPath + jsTestName(); +let conn = MongoRunner.runMongod({dbpath: dbpath}); +let testDB = conn.getDB(jsTestName()); +// This test should only be run with the flag enabled. +assert(FeatureFlagUtil.isEnabled(testDB, "Telemetry")); + +function testLower(restart = false) { + let adminDB = conn.getDB("admin"); + assert.commandWorked(adminDB.runCommand( + {setFeatureCompatibilityVersion: binVersionToFCV("last-lts"), confirm: true})); + if (restart) { + MongoRunner.stopMongod(conn); + conn = MongoRunner.runMongod({dbpath: dbpath, noCleanData: true}); + testDB = conn.getDB(jsTestName()); + adminDB = conn.getDB("admin"); + } + + assert.commandFailedWithCode( + testDB.adminCommand({aggregate: 1, pipeline: [{$telemetry: {}}], cursor: {}}), 6579000); + + // Upgrade FCV. + assert.commandWorked(adminDB.runCommand( + {setFeatureCompatibilityVersion: binVersionToFCV("latest"), confirm: true})); + + // We should be able to run a telemetry pipeline now that the FCV is correct. + assert.commandWorked( + testDB.adminCommand({aggregate: 1, pipeline: [{$telemetry: {}}], cursor: {}}), + ); +} +testLower(true); +testLower(false); +MongoRunner.stopMongod(conn); +})(); diff --git a/jstests/noPassthroughWithMongod/telemetry_configuration.js b/jstests/noPassthroughWithMongod/telemetry_configuration.js index 370733a4480..0ae4e8408c3 100644 --- a/jstests/noPassthroughWithMongod/telemetry_configuration.js +++ b/jstests/noPassthroughWithMongod/telemetry_configuration.js @@ -22,13 +22,14 @@ if (FeatureFlagUtil.isEnabled(db, "Telemetry")) { } } testTelemetrySetting("internalQueryConfigureTelemetryCacheSize", "2MB"); - testTelemetrySetting("internalQueryStatsSamplingRate", 2147483647); + testTelemetrySetting("internalQueryConfigureTelemetrySamplingRate", 2147483647); } else { // The feature flag is disabled - make sure the telemetry store *cannot* be configured. assert.commandFailedWithCode( db.adminCommand({setParameter: 1, internalQueryConfigureTelemetryCacheSize: '2MB'}), 7373500); assert.commandFailedWithCode( - db.adminCommand({setParameter: 1, internalQueryStatsSamplingRate: 2147483647}), 7506200); + db.adminCommand({setParameter: 1, internalQueryConfigureTelemetrySamplingRate: 2147483647}), + 7506200); } }()); diff --git a/src/mongo/db/auth/action_type.idl b/src/mongo/db/auth/action_type.idl index 6837625e6f1..137ac3c9542 100644 --- a/src/mongo/db/auth/action_type.idl +++ b/src/mongo/db/auth/action_type.idl @@ -149,7 +149,7 @@ enums: planCacheIndexFilter : "planCacheIndexFilter" # view/update index filters planCacheRead : "planCacheRead" # view contents of plan cache planCacheWrite : "planCacheWrite" # clear cache, drop cache entry, pin/unpin/shun plans - queryStatsRead: "queryStatsRead" # view contents of queryStats store + telemetryRead: "telemetryRead" # view contents of telemetry store refineCollectionShardKey : "refineCollectionShardKey" reIndex : "reIndex" remove : "remove" diff --git a/src/mongo/db/auth/builtin_roles.yml b/src/mongo/db/auth/builtin_roles.yml index e384a959f1d..a29a476a91c 100644 --- a/src/mongo/db/auth/builtin_roles.yml +++ b/src/mongo/db/auth/builtin_roles.yml @@ -353,7 +353,7 @@ roles: - getClusterParameter - setChangeStreamState - getChangeStreamState - - queryStatsRead + - telemetryRead - checkMetadataConsistency - transitionFromDedicatedConfigServer - transitionToDedicatedConfigServer diff --git a/src/mongo/db/clientcursor.cpp b/src/mongo/db/clientcursor.cpp index 3b4f0143876..55e116e5893 100644 --- a/src/mongo/db/clientcursor.cpp +++ b/src/mongo/db/clientcursor.cpp @@ -48,7 +48,7 @@ #include "mongo/db/cursor_server_params.h" #include "mongo/db/jsobj.h" #include "mongo/db/query/explain.h" -#include "mongo/db/query/query_stats.h" +#include "mongo/db/query/telemetry.h" #include "mongo/db/repl/repl_client_info.h" #include "mongo/db/repl/replication_coordinator.h" #include "mongo/util/background.h" @@ -124,10 +124,10 @@ ClientCursor::ClientCursor(ClientCursorParams params, _planSummary(_exec->getPlanExplainer().getPlanSummary()), _planCacheKey(CurOp::get(operationUsingCursor)->debug().planCacheKey), _queryHash(CurOp::get(operationUsingCursor)->debug().queryHash), - _queryStatsStoreKeyHash(CurOp::get(operationUsingCursor)->debug().queryStatsStoreKeyHash), - _queryStatsStoreKey(CurOp::get(operationUsingCursor)->debug().queryStatsStoreKey), - _queryStatsRequestShapifier( - std::move(CurOp::get(operationUsingCursor)->debug().queryStatsRequestShapifier)), + _telemetryStoreKeyHash(CurOp::get(operationUsingCursor)->debug().telemetryStoreKeyHash), + _telemetryStoreKey(CurOp::get(operationUsingCursor)->debug().telemetryStoreKey), + _telemetryRequestShapifier( + std::move(CurOp::get(operationUsingCursor)->debug().telemetryRequestShapifier)), _shouldOmitDiagnosticInformation( CurOp::get(operationUsingCursor)->debug().shouldOmitDiagnosticInformation), _opKey(operationUsingCursor->getOperationKey()) { @@ -161,13 +161,13 @@ void ClientCursor::dispose(OperationContext* opCtx, boost::optional now) return; } - if (_queryStatsStoreKeyHash && opCtx) { - query_stats::writeQueryStats(opCtx, - _queryStatsStoreKeyHash, - _queryStatsStoreKey, - std::move(_queryStatsRequestShapifier), - _metrics.executionTime.value_or(Microseconds{0}).count(), - _metrics.nreturned.value_or(0)); + if (_telemetryStoreKeyHash && opCtx) { + telemetry::writeTelemetry(opCtx, + _telemetryStoreKeyHash, + _telemetryStoreKey, + std::move(_telemetryRequestShapifier), + _metrics.executionTime.value_or(Microseconds{0}).count(), + _metrics.nreturned.value_or(0)); } if (now) { @@ -397,19 +397,19 @@ void startClientCursorMonitor() { getClientCursorMonitor(getGlobalServiceContext()).go(); } -void collectQueryStatsMongod(OperationContext* opCtx, ClientCursorPin& pinnedCursor) { +void collectTelemetryMongod(OperationContext* opCtx, ClientCursorPin& pinnedCursor) { pinnedCursor->incrementCursorMetrics(CurOp::get(opCtx)->debug().additiveMetrics); } -void collectQueryStatsMongod(OperationContext* opCtx, - std::unique_ptr requestShapifier) { +void collectTelemetryMongod(OperationContext* opCtx, + std::unique_ptr requestShapifier) { // If we haven't registered a cursor to prepare for getMore requests, we record // telemetry directly. auto& opDebug = CurOp::get(opCtx)->debug(); - query_stats::writeQueryStats( + telemetry::writeTelemetry( opCtx, - opDebug.queryStatsStoreKeyHash, - opDebug.queryStatsStoreKey, + opDebug.telemetryStoreKeyHash, + opDebug.telemetryStoreKey, std::move(requestShapifier), opDebug.additiveMetrics.executionTime.value_or(Microseconds{0}).count(), opDebug.additiveMetrics.nreturned.value_or(0)); diff --git a/src/mongo/db/clientcursor.h b/src/mongo/db/clientcursor.h index 8ae75473496..9e7d35ade9a 100644 --- a/src/mongo/db/clientcursor.h +++ b/src/mongo/db/clientcursor.h @@ -448,15 +448,15 @@ private: boost::optional _queryHash; // If boost::none, telemetry should not be collected for this cursor. - boost::optional _queryStatsStoreKeyHash; + boost::optional _telemetryStoreKeyHash; // TODO: SERVER-73152 remove telemetryStoreKey when RequestShapifier is used for agg. - boost::optional _queryStatsStoreKey; + boost::optional _telemetryStoreKey; // Metrics that are accumulated over the lifetime of the cursor, incremented with each getMore. - // Useful for diagnostics like queryStats. + // Useful for diagnostics like telemetry. OpDebug::AdditiveMetrics _metrics; // The RequestShapifier used by telemetry to shapify the request payload into the telemetry // store key. - std::unique_ptr _queryStatsRequestShapifier; + std::unique_ptr _telemetryRequestShapifier; // Flag to decide if diagnostic information should be omitted. bool _shouldOmitDiagnosticInformation{false}; @@ -598,7 +598,7 @@ void startClientCursorMonitor(); * Currently, telemetry is only collected for find and aggregate requests (and their subsequent * getMore requests), so these should only be called from those request paths. */ -void collectQueryStatsMongod(OperationContext* opCtx, ClientCursorPin& cursor); -void collectQueryStatsMongod(OperationContext* opCtx, - std::unique_ptr requestShapifier); +void collectTelemetryMongod(OperationContext* opCtx, ClientCursorPin& cursor); +void collectTelemetryMongod(OperationContext* opCtx, + std::unique_ptr requestShapifier); } // namespace mongo diff --git a/src/mongo/db/commands/find_cmd.cpp b/src/mongo/db/commands/find_cmd.cpp index 983661fbd15..90e6fa15ca1 100644 --- a/src/mongo/db/commands/find_cmd.cpp +++ b/src/mongo/db/commands/find_cmd.cpp @@ -55,7 +55,7 @@ #include "mongo/db/query/find_request_shapifier.h" #include "mongo/db/query/get_executor.h" #include "mongo/db/query/query_knobs_gen.h" -#include "mongo/db/query/query_stats.h" +#include "mongo/db/query/telemetry.h" #include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/s/query_analysis_writer.h" #include "mongo/db/service_context.h" @@ -561,14 +561,13 @@ public: cq->setUseCqfIfEligible(true); if (collection) { - // Collect queryStats. Exclude queries against collections with encrypted fields. + // Collect telemetry. Exclude queries against collections with encrypted fields. if (!collection.get()->getCollectionOptions().encryptedFieldConfig) { - query_stats::registerRequest( - std::make_unique( - cq->getFindCommandRequest(), opCtx), - collection.get()->ns(), - opCtx, - cq->getExpCtx()); + telemetry::registerRequest(std::make_unique( + cq->getFindCommandRequest(), opCtx), + collection.get()->ns(), + opCtx, + cq->getExpCtx()); } } @@ -781,9 +780,9 @@ public: processFLEFindD( opCtx, findCommand->getNamespaceOrUUID().nss().value(), findCommand.get()); } - // Set the queryStatsStoreKey to none so queryStats isn't collected when we've done - // a FLE rewrite. - CurOp::get(opCtx)->debug().queryStatsStoreKeyHash = boost::none; + // Set the telemetryStoreKey to none so telemetry isn't collected when we've done a + // FLE rewrite. + CurOp::get(opCtx)->debug().telemetryStoreKeyHash = boost::none; CurOp::get(opCtx)->debug().shouldOmitDiagnosticInformation = true; } diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp index 54035c99829..8f91862d002 100644 --- a/src/mongo/db/commands/getmore_cmd.cpp +++ b/src/mongo/db/commands/getmore_cmd.cpp @@ -703,7 +703,7 @@ public: metricsCollector.incrementDocUnitsReturned(curOp->getNS(), docUnitsReturned); curOp->debug().additiveMetrics.nBatches = 1; curOp->setEndOfOpMetrics(numResults); - collectQueryStatsMongod(opCtx, cursorPin); + collectTelemetryMongod(opCtx, cursorPin); if (respondWithId) { cursorDeleter.dismiss(); diff --git a/src/mongo/db/commands/run_aggregate.cpp b/src/mongo/db/commands/run_aggregate.cpp index 0bc7d7f6415..a290ef56713 100644 --- a/src/mongo/db/commands/run_aggregate.cpp +++ b/src/mongo/db/commands/run_aggregate.cpp @@ -76,7 +76,7 @@ #include "mongo/db/query/query_feature_flags_gen.h" #include "mongo/db/query/query_knobs_gen.h" #include "mongo/db/query/query_planner_common.h" -#include "mongo/db/query/query_stats.h" +#include "mongo/db/query/telemetry.h" #include "mongo/db/read_concern.h" #include "mongo/db/repl/oplog.h" #include "mongo/db/repl/read_concern_args.h" @@ -836,11 +836,11 @@ Status runAggregate(OperationContext* opCtx, }; auto registerTelemetry = [&]() -> void { - // Register queryStats. Exclude queries against collections with encrypted fields. - // We still collect queryStats on collection-less aggregations. + // Register telemetry. Exclude queries against collections with encrypted fields. + // We still collect telemetry on collection-less aggregations. if (!(ctx && ctx->getCollection() && ctx->getCollection()->getCollectionOptions().encryptedFieldConfig)) { - query_stats::registerAggRequest(request, opCtx); + telemetry::registerAggRequest(request, opCtx); } }; @@ -1051,9 +1051,9 @@ Status runAggregate(OperationContext* opCtx, request.getEncryptionInformation()->setCrudProcessed(true); } - // Set the queryStatsStoreKey to none so queryStats isn't collected when we've done a - // FLE rewrite. - CurOp::get(opCtx)->debug().queryStatsStoreKeyHash = boost::none; + // Set the telemetryStoreKey to none so telemetry isn't collected when we've done a FLE + // rewrite. + CurOp::get(opCtx)->debug().telemetryStoreKeyHash = boost::none; } pipeline->optimizePipeline(); @@ -1223,9 +1223,9 @@ Status runAggregate(OperationContext* opCtx, curOp->setEndOfOpMetrics(stats.nReturned); if (keepCursor) { - collectQueryStatsMongod(opCtx, pins[0]); + collectTelemetryMongod(opCtx, pins[0]); } else { - collectQueryStatsMongod(opCtx, std::move(curOp->debug().queryStatsRequestShapifier)); + collectTelemetryMongod(opCtx, std::move(curOp->debug().telemetryRequestShapifier)); } // For an optimized away pipeline, signal the cache that a query operation has completed. diff --git a/src/mongo/db/curop.h b/src/mongo/db/curop.h index 9f5c32b10d1..8851993b015 100644 --- a/src/mongo/db/curop.h +++ b/src/mongo/db/curop.h @@ -294,12 +294,12 @@ public: boost::optional queryHash; // The shape of the original query serialized with readConcern, application name, and namespace. // If boost::none, telemetry should not be collected for this operation. - boost::optional queryStatsStoreKeyHash; + boost::optional telemetryStoreKeyHash; // TODO: SERVER-73152 remove telemetryStoreKey when RequestShapifier is used for agg. - boost::optional queryStatsStoreKey; + boost::optional telemetryStoreKey; // The RequestShapifier used by telemetry to shapify the request payload into the telemetry // store key. - std::unique_ptr queryStatsRequestShapifier; + std::unique_ptr telemetryRequestShapifier; // The query framework that this operation used. Will be unknown for non query operations. PlanExecutor::QueryFramework queryFramework{PlanExecutor::QueryFramework::kUnknown}; @@ -776,7 +776,7 @@ public: return computeElapsedTimeTotal(start, _end.load()) - _totalPausedDuration; } /** - * The planningTimeMicros metric, reported in the system profiler and in queryStats, is measured + * The planningTimeMicros metric, reported in the system profiler and in telemetry, is measured * using the Curop instance's _tickSource. Currently, _tickSource is only paused in places where logical work is being done. If this were to change, and _tickSource were to be paused during query planning for reasons unrelated to the work of diff --git a/src/mongo/db/cursor_manager.cpp b/src/mongo/db/cursor_manager.cpp index 34f7d7bdce0..ac9c41accfd 100644 --- a/src/mongo/db/cursor_manager.cpp +++ b/src/mongo/db/cursor_manager.cpp @@ -214,10 +214,10 @@ StatusWith CursorManager::pinCursor( CurOp::get(opCtx)->debug().queryHash = cursor->_queryHash; CurOp::get(opCtx)->debug().planCacheKey = cursor->_planCacheKey; - // Pass along queryStats context so it is retrievable after query execution for storing metrics. - CurOp::get(opCtx)->debug().queryStatsStoreKeyHash = cursor->_queryStatsStoreKeyHash; - // TODO: SERVER-73152 remove queryStatsStoreKey when RequestShapifier is used for agg. - CurOp::get(opCtx)->debug().queryStatsStoreKey = cursor->_queryStatsStoreKey; + // Pass along telemetry context so it is retrievable after query execution for storing metrics. + CurOp::get(opCtx)->debug().telemetryStoreKeyHash = cursor->_telemetryStoreKeyHash; + // TODO: SERVER-73152 remove telemetryStoreKey when RequestShapifier is used for agg. + CurOp::get(opCtx)->debug().telemetryStoreKey = cursor->_telemetryStoreKey; cursor->_operationUsingCursor = opCtx; diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript index eacb62bb6ea..72a1ab942b4 100644 --- a/src/mongo/db/pipeline/SConscript +++ b/src/mongo/db/pipeline/SConscript @@ -328,7 +328,7 @@ pipelineEnv.Library( 'document_source_sort_by_count.cpp', 'document_source_streaming_group.cpp', 'document_source_tee_consumer.cpp', - 'document_source_query_stats.cpp', + 'document_source_telemetry.cpp', 'document_source_union_with.cpp', 'document_source_unwind.cpp', 'group_from_first_document_transformation.cpp', @@ -634,7 +634,7 @@ env.CppUnitTest( 'document_source_skip_test.cpp', 'document_source_sort_by_count_test.cpp', 'document_source_sort_test.cpp', - 'document_source_query_stats_test.cpp', + 'document_source_telemetry_test.cpp', 'document_source_union_with_test.cpp', 'document_source_internal_compute_geo_near_distance_test.cpp', 'document_source_internal_convert_bucket_index_stats_test.cpp', diff --git a/src/mongo/db/pipeline/abt/document_source_visitor.cpp b/src/mongo/db/pipeline/abt/document_source_visitor.cpp index 9b7b27d3af0..2170ab14407 100644 --- a/src/mongo/db/pipeline/abt/document_source_visitor.cpp +++ b/src/mongo/db/pipeline/abt/document_source_visitor.cpp @@ -58,7 +58,6 @@ #include "mongo/db/pipeline/document_source_operation_metrics.h" #include "mongo/db/pipeline/document_source_out.h" #include "mongo/db/pipeline/document_source_plan_cache_stats.h" -#include "mongo/db/pipeline/document_source_query_stats.h" #include "mongo/db/pipeline/document_source_queue.h" #include "mongo/db/pipeline/document_source_redact.h" #include "mongo/db/pipeline/document_source_sample.h" @@ -68,6 +67,7 @@ #include "mongo/db/pipeline/document_source_skip.h" #include "mongo/db/pipeline/document_source_sort.h" #include "mongo/db/pipeline/document_source_tee_consumer.h" +#include "mongo/db/pipeline/document_source_telemetry.h" #include "mongo/db/pipeline/document_source_union_with.h" #include "mongo/db/pipeline/document_source_unwind.h" #include "mongo/db/pipeline/visitors/document_source_visitor_registry_mongod.h" diff --git a/src/mongo/db/pipeline/aggregate_command.idl b/src/mongo/db/pipeline/aggregate_command.idl index 476fc8dbb9d..b53ea540f8e 100644 --- a/src/mongo/db/pipeline/aggregate_command.idl +++ b/src/mongo/db/pipeline/aggregate_command.idl @@ -96,10 +96,10 @@ commands: - privilege: # $planCacheStats resource_pattern: exact_namespace action_type: planCacheRead - - privilege: # $queryStats - agg_stage: queryStats + - privilege: # $telemetry + agg_stage: telemetry resource_pattern: cluster - action_type: queryStatsRead + action_type: telemetryRead - privilege: # $changeStream resource_pattern: exact_namespace action_type: changeStream diff --git a/src/mongo/db/pipeline/aggregate_request_shapifier.cpp b/src/mongo/db/pipeline/aggregate_request_shapifier.cpp index 485b97e2c22..40ed6c2ce79 100644 --- a/src/mongo/db/pipeline/aggregate_request_shapifier.cpp +++ b/src/mongo/db/pipeline/aggregate_request_shapifier.cpp @@ -31,20 +31,20 @@ #include "mongo/db/query/query_shape.h" -namespace mongo::query_stats { +namespace mongo::telemetry { -BSONObj AggregateRequestShapifier::makeQueryStatsKey(const SerializationOptions& opts, - OperationContext* opCtx) const { +BSONObj AggregateRequestShapifier::makeTelemetryKey(const SerializationOptions& opts, + OperationContext* opCtx) const { // TODO SERVER-76087 We will likely want to set a flag here to stop $search from calling out // to mongot. auto expCtx = make_intrusive(opCtx, nullptr, _request.getNamespace()); expCtx->variables.setDefaultRuntimeConstants(opCtx); expCtx->maxFeatureCompatibilityVersion = boost::none; // Ensure all features are allowed. expCtx->stopExpressionCounters(); - return makeQueryStatsKey(opts, expCtx); + return makeTelemetryKey(opts, expCtx); } -BSONObj AggregateRequestShapifier::makeQueryStatsKey( +BSONObj AggregateRequestShapifier::makeTelemetryKey( const SerializationOptions& opts, const boost::intrusive_ptr& expCtx) const { BSONObjBuilder bob; @@ -84,4 +84,4 @@ BSONObj AggregateRequestShapifier::makeQueryStatsKey( return bob.obj(); } -} // namespace mongo::query_stats +} // namespace mongo::telemetry diff --git a/src/mongo/db/pipeline/aggregate_request_shapifier.h b/src/mongo/db/pipeline/aggregate_request_shapifier.h index d78dae31be7..3a0c41f8dd9 100644 --- a/src/mongo/db/pipeline/aggregate_request_shapifier.h +++ b/src/mongo/db/pipeline/aggregate_request_shapifier.h @@ -33,7 +33,7 @@ #include "mongo/db/pipeline/pipeline.h" #include "mongo/db/query/request_shapifier.h" -namespace mongo::query_stats { +namespace mongo::telemetry { /** * Handles shapification for AggregateCommandRequests. Requires a pre-parsed pipeline in order to @@ -50,14 +50,13 @@ public: virtual ~AggregateRequestShapifier() = default; - BSONObj makeQueryStatsKey(const SerializationOptions& opts, - OperationContext* opCtx) const final; + BSONObj makeTelemetryKey(const SerializationOptions& opts, OperationContext* opCtx) const final; - BSONObj makeQueryStatsKey(const SerializationOptions& opts, - const boost::intrusive_ptr& expCtx) const final; + BSONObj makeTelemetryKey(const SerializationOptions& opts, + const boost::intrusive_ptr& expCtx) const final; private: const AggregateCommandRequest& _request; const Pipeline& _pipeline; }; -} // namespace mongo::query_stats +} // namespace mongo::telemetry diff --git a/src/mongo/db/pipeline/document_source_query_stats.cpp b/src/mongo/db/pipeline/document_source_query_stats.cpp deleted file mode 100644 index 48f14e0ade6..00000000000 --- a/src/mongo/db/pipeline/document_source_query_stats.cpp +++ /dev/null @@ -1,215 +0,0 @@ -/** - * Copyright (C) 2022-present MongoDB, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the Server Side Public License, version 1, - * as published by MongoDB, Inc. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Server Side Public License for more details. - * - * You should have received a copy of the Server Side Public License - * along with this program. If not, see - * . - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the Server Side Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#include "mongo/db/pipeline/document_source_query_stats.h" - -#include "mongo/bson/bsontypes.h" -#include "mongo/bson/timestamp.h" -#include "mongo/util/assert_util.h" -#include "mongo/util/debug_util.h" - -#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery - -namespace mongo { -namespace { -CounterMetric queryStatsHmacApplicationErrors("queryStats.numHmacApplicationErrors"); -} - -REGISTER_DOCUMENT_SOURCE_WITH_FEATURE_FLAG(queryStats, - DocumentSourceQueryStats::LiteParsed::parse, - DocumentSourceQueryStats::createFromBson, - AllowedWithApiStrict::kNeverInVersion1, - feature_flags::gFeatureFlagQueryStats); - -namespace { -/** - * Try to parse the applyHmacToIdentifiers property from the element. - */ -boost::optional parseApplyHmacToIdentifiers(const BSONElement& el) { - if (el.fieldNameStringData() == "applyHmacToIdentifiers"_sd) { - auto type = el.type(); - uassert(ErrorCodes::FailedToParse, - str::stream() << DocumentSourceQueryStats::kStageName - << " applyHmacToIdentifiers parameter must be boolean. Found type: " - << typeName(type), - type == BSONType::Bool); - return el.trueValue(); - } - return boost::none; -} - -/** - * Try to parse the `hmacKey' property from the element. - */ -boost::optional parseHmacKey(const BSONElement& el) { - if (el.fieldNameStringData() == "hmacKey"_sd) { - auto type = el.type(); - if (el.isBinData(BinDataType::BinDataGeneral)) { - int len; - auto data = el.binData(len); - uassert(ErrorCodes::FailedToParse, - str::stream() << DocumentSourceQueryStats::kStageName - << "hmacKey must be greater than or equal to 32 bytes", - len >= 32); - return {{data, (size_t)len}}; - } - uasserted(ErrorCodes::FailedToParse, - str::stream() - << DocumentSourceQueryStats::kStageName - << " hmacKey parameter must be bindata of length 32 or greater. Found type: " - << typeName(type)); - } - return boost::none; -} - -/** - * Parse the spec object calling the `ctor` with the bool applyHmacToIdentifiers and std::string - * hmacKey arguments. - */ -template -auto parseSpec(const BSONElement& spec, const Ctor& ctor) { - uassert(ErrorCodes::FailedToParse, - str::stream() << DocumentSourceQueryStats::kStageName - << " value must be an object. Found: " << typeName(spec.type()), - spec.type() == BSONType::Object); - - bool applyHmacToIdentifiers = false; - std::string hmacKey; - for (auto&& el : spec.embeddedObject()) { - if (auto maybeApplyHmacToIdentifiers = parseApplyHmacToIdentifiers(el); - maybeApplyHmacToIdentifiers) { - applyHmacToIdentifiers = *maybeApplyHmacToIdentifiers; - } else if (auto maybeHmacKey = parseHmacKey(el); maybeHmacKey) { - hmacKey = *maybeHmacKey; - } else { - uasserted(ErrorCodes::FailedToParse, - str::stream() - << DocumentSourceQueryStats::kStageName - << " parameters object may only contain 'applyHmacToIdentifiers' or " - "'hmacKey' options. Found: " - << el.fieldName()); - } - } - - return ctor(applyHmacToIdentifiers, hmacKey); -} - -} // namespace - -std::unique_ptr DocumentSourceQueryStats::LiteParsed::parse( - const NamespaceString& nss, const BSONElement& spec) { - return parseSpec(spec, [&](bool applyHmacToIdentifiers, std::string hmacKey) { - return std::make_unique( - spec.fieldName(), applyHmacToIdentifiers, hmacKey); - }); -} - -boost::intrusive_ptr DocumentSourceQueryStats::createFromBson( - BSONElement spec, const boost::intrusive_ptr& pExpCtx) { - const NamespaceString& nss = pExpCtx->ns; - - uassert(ErrorCodes::InvalidNamespace, - "$queryStats must be run against the 'admin' database with {aggregate: 1}", - nss.db() == DatabaseName::kAdmin.db() && nss.isCollectionlessAggregateNS()); - - return parseSpec(spec, [&](bool applyHmacToIdentifiers, std::string hmacKey) { - return new DocumentSourceQueryStats(pExpCtx, applyHmacToIdentifiers, hmacKey); - }); -} - -Value DocumentSourceQueryStats::serialize(SerializationOptions opts) const { - // This document source never contains any user information, so no need for any work when - // applying hmac. - return Value{Document{{kStageName, Document{}}}}; -} - -DocumentSource::GetNextResult DocumentSourceQueryStats::doGetNext() { - /** - * We maintain nested iterators: - * - Outer one over the set of partitions. - * - Inner one over the set of entries in a "materialized" partition. - * - * When an inner iterator is present and contains more elements, we can return the next element. - * When the inner iterator is exhausted, we move to the next element in the outer iterator and - * create a new inner iterator. When the outer iterator is exhausted, we have finished iterating - * over the queryStats store entries. - * - * The inner iterator iterates over a materialized container of all entries in the partition. - * This is done to reduce the time under which the partition lock is held. - */ - while (true) { - // First, attempt to exhaust all elements in the materialized partition. - if (!_materializedPartition.empty()) { - // Move out of the container reference. - auto doc = std::move(_materializedPartition.front()); - _materializedPartition.pop_front(); - return {std::move(doc)}; - } - - QueryStatsStore& _queryStatsStore = getQueryStatsStore(getContext()->opCtx); - - // Materialized partition is exhausted, move to the next. - _currentPartition++; - if (_currentPartition >= _queryStatsStore.numPartitions()) { - return DocumentSource::GetNextResult::makeEOF(); - } - - // We only keep the partition (which holds a lock) for the time needed to materialize it to - // a set of Document instances. - auto&& partition = _queryStatsStore.getPartition(_currentPartition); - - // Capture the time at which reading the partition begins to indicate to the caller - // when the snapshot began. - const auto partitionReadTime = - Timestamp{Timestamp(Date_t::now().toMillisSinceEpoch() / 1000, 0)}; - for (auto&& [key, metrics] : *partition) { - try { - auto queryStatsKey = metrics->computeQueryStatsKey( - pExpCtx->opCtx, _applyHmacToIdentifiers, _hmacKey); - _materializedPartition.push_back({{"key", std::move(queryStatsKey)}, - {"metrics", metrics->toBSON()}, - {"asOf", partitionReadTime}}); - } catch (const DBException& ex) { - queryStatsHmacApplicationErrors.increment(); - LOGV2_DEBUG(7349403, - 3, - "Error encountered when applying hmac to query shape, will not publish " - "queryStats for this entry.", - "status"_attr = ex.toStatus(), - "hash"_attr = key); - if (kDebugBuild) { - tasserted(7349401, - "Was not able to re-parse queryStats key when reading queryStats."); - } - } - } - } -} - -} // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_query_stats.h b/src/mongo/db/pipeline/document_source_query_stats.h deleted file mode 100644 index 74d40583a6a..00000000000 --- a/src/mongo/db/pipeline/document_source_query_stats.h +++ /dev/null @@ -1,147 +0,0 @@ -/** - * Copyright (C) 2022-present MongoDB, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the Server Side Public License, version 1, - * as published by MongoDB, Inc. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Server Side Public License for more details. - * - * You should have received a copy of the Server Side Public License - * along with this program. If not, see - * . - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the Server Side Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#pragma once - -#include "mongo/db/pipeline/document_source.h" -#include "mongo/db/pipeline/lite_parsed_document_source.h" -#include "mongo/db/query/query_stats.h" -#include "mongo/util/producer_consumer_queue.h" - -namespace mongo { - -using namespace query_stats; - -class DocumentSourceQueryStats final : public DocumentSource { -public: - static constexpr StringData kStageName = "$queryStats"_sd; - - class LiteParsed final : public LiteParsedDocumentSource { - public: - static std::unique_ptr parse(const NamespaceString& nss, - const BSONElement& spec); - - LiteParsed(std::string parseTimeName, bool applyHmacToIdentifiers, std::string hmacKey) - : LiteParsedDocumentSource(std::move(parseTimeName)), - _applyHmacToIdentifiers(applyHmacToIdentifiers), - _hmacKey(hmacKey) {} - - stdx::unordered_set getInvolvedNamespaces() const override { - return stdx::unordered_set(); - } - - PrivilegeVector requiredPrivileges(bool isMongos, - bool bypassDocumentValidation) const override { - return {Privilege(ResourcePattern::forClusterResource(), ActionType::queryStatsRead)}; - ; - } - - bool allowedToPassthroughFromMongos() const final { - // $queryStats must be run locally on a mongod. - return false; - } - - bool isInitialSource() const final { - return true; - } - - void assertSupportsMultiDocumentTransaction() const { - transactionNotSupported(kStageName); - } - - bool _applyHmacToIdentifiers; - - std::string _hmacKey; - }; - - static boost::intrusive_ptr createFromBson( - BSONElement elem, const boost::intrusive_ptr& pExpCtx); - - virtual ~DocumentSourceQueryStats() = default; - - StageConstraints constraints( - Pipeline::SplitState = Pipeline::SplitState::kUnsplit) const override { - StageConstraints constraints{StreamType::kStreaming, - PositionRequirement::kFirst, - HostTypeRequirement::kLocalOnly, - DiskUseRequirement::kNoDiskUse, - FacetRequirement::kNotAllowed, - TransactionRequirement::kNotAllowed, - LookupRequirement::kNotAllowed, - UnionRequirement::kNotAllowed}; - - constraints.requiresInputDocSource = false; - constraints.isIndependentOfAnyCollection = true; - return constraints; - } - - boost::optional distributedPlanLogic() final { - return boost::none; - } - - const char* getSourceName() const override { - return kStageName.rawData(); - } - - Value serialize(SerializationOptions opts = SerializationOptions()) const final override; - - void addVariableRefs(std::set* refs) const final {} - -private: - DocumentSourceQueryStats(const boost::intrusive_ptr& expCtx, - bool applyHmacToIdentifiers = false, - std::string hmacKey = {}) - : DocumentSource(kStageName, expCtx), - _applyHmacToIdentifiers(applyHmacToIdentifiers), - _hmacKey(hmacKey) {} - - GetNextResult doGetNext() final; - - /** - * The current partition materialized as a set of Document instances. We pop from the queue and - * return DocumentSource results. - */ - std::deque _materializedPartition; - - /** - * Iterator over all queryStats partitions. This is incremented when we exhaust the current - * _materializedPartition. - */ - QueryStatsStore::PartitionId _currentPartition = -1; - - // When true, apply hmac to field names from returned query shapes. - bool _applyHmacToIdentifiers; - - /** - * Key used for SHA-256 HMAC application on field names. - */ - std::string _hmacKey; -}; - -} // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_query_stats_test.cpp b/src/mongo/db/pipeline/document_source_query_stats_test.cpp deleted file mode 100644 index 7e29a44d591..00000000000 --- a/src/mongo/db/pipeline/document_source_query_stats_test.cpp +++ /dev/null @@ -1,95 +0,0 @@ -/** - * Copyright (C) 2022-present MongoDB, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the Server Side Public License, version 1, - * as published by MongoDB, Inc. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Server Side Public License for more details. - * - * You should have received a copy of the Server Side Public License - * along with this program. If not, see - * . - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the Server Side Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#include "mongo/platform/basic.h" - -#include "mongo/db/exec/document_value/document.h" -#include "mongo/db/exec/document_value/document_value_test_util.h" -#include "mongo/db/pipeline/aggregation_context_fixture.h" -#include "mongo/db/pipeline/document_source_query_stats.h" -#include "mongo/unittest/unittest.h" -#include "mongo/util/assert_util.h" -#include "mongo/util/str.h" - -namespace mongo { -namespace { - -/** - * Subclass AggregationContextFixture to set the ExpressionContext's namespace to 'admin' with - * {aggregate: 1} by default, so that parsing tests other than those which validate the namespace do - * not need to explicitly set it. - */ -class DocumentSourceQueryStatsTest : public AggregationContextFixture { -public: - DocumentSourceQueryStatsTest() - : AggregationContextFixture( - NamespaceString::makeCollectionlessAggregateNSS(DatabaseName::kAdmin)) {} -}; - -TEST_F(DocumentSourceQueryStatsTest, ShouldFailToParseIfSpecIsNotObject) { - ASSERT_THROWS_CODE(DocumentSourceQueryStats::createFromBson( - fromjson("{$queryStats: 1}").firstElement(), getExpCtx()), - AssertionException, - ErrorCodes::FailedToParse); -} - -TEST_F(DocumentSourceQueryStatsTest, ShouldFailToParseIfNotRunOnAdmin) { - getExpCtx()->ns = NamespaceString::makeCollectionlessAggregateNSS( - DatabaseName::createDatabaseName_forTest(boost::none, "foo")); - ASSERT_THROWS_CODE(DocumentSourceQueryStats::createFromBson( - fromjson("{$queryStats: {}}").firstElement(), getExpCtx()), - AssertionException, - ErrorCodes::InvalidNamespace); -} - -TEST_F(DocumentSourceQueryStatsTest, ShouldFailToParseIfNotRunWithAggregateOne) { - getExpCtx()->ns = NamespaceString::createNamespaceString_forTest("admin.foo"); - ASSERT_THROWS_CODE(DocumentSourceQueryStats::createFromBson( - fromjson("{$queryStats: {}}").firstElement(), getExpCtx()), - AssertionException, - ErrorCodes::InvalidNamespace); -} - -TEST_F(DocumentSourceQueryStatsTest, ShouldFailToParseIfUnrecognisedParameterSpecified) { - ASSERT_THROWS_CODE(DocumentSourceQueryStats::createFromBson( - fromjson("{$queryStats: {foo: true}}").firstElement(), getExpCtx()), - AssertionException, - ErrorCodes::FailedToParse); -} - -TEST_F(DocumentSourceQueryStatsTest, ParseAndSerialize) { - auto obj = fromjson("{$queryStats: {}}"); - auto doc = DocumentSourceQueryStats::createFromBson(obj.firstElement(), getExpCtx()); - auto queryStatsOp = static_cast(doc.get()); - auto expected = Document{{"$queryStats", Document{}}}; - ASSERT_DOCUMENT_EQ(queryStatsOp->serialize().getDocument(), expected); -} - -} // namespace -} // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_telemetry.cpp b/src/mongo/db/pipeline/document_source_telemetry.cpp new file mode 100644 index 00000000000..b037515796f --- /dev/null +++ b/src/mongo/db/pipeline/document_source_telemetry.cpp @@ -0,0 +1,215 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/pipeline/document_source_telemetry.h" + +#include "mongo/bson/bsontypes.h" +#include "mongo/bson/timestamp.h" +#include "mongo/util/assert_util.h" +#include "mongo/util/debug_util.h" + +#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery + +namespace mongo { +namespace { +CounterMetric telemetryHmacApplicationErrors("telemetry.numHmacApplicationErrors"); +} + +REGISTER_DOCUMENT_SOURCE_WITH_FEATURE_FLAG(telemetry, + DocumentSourceTelemetry::LiteParsed::parse, + DocumentSourceTelemetry::createFromBson, + AllowedWithApiStrict::kNeverInVersion1, + feature_flags::gFeatureFlagTelemetry); + +namespace { +/** + * Try to parse the applyHmacToIdentifiers property from the element. + */ +boost::optional parseApplyHmacToIdentifiers(const BSONElement& el) { + if (el.fieldNameStringData() == "applyHmacToIdentifiers"_sd) { + auto type = el.type(); + uassert(ErrorCodes::FailedToParse, + str::stream() << DocumentSourceTelemetry::kStageName + << " applyHmacToIdentifiers parameter must be boolean. Found type: " + << typeName(type), + type == BSONType::Bool); + return el.trueValue(); + } + return boost::none; +} + +/** + * Try to parse the `hmacKey' property from the element. + */ +boost::optional parseHmacKey(const BSONElement& el) { + if (el.fieldNameStringData() == "hmacKey"_sd) { + auto type = el.type(); + if (el.isBinData(BinDataType::BinDataGeneral)) { + int len; + auto data = el.binData(len); + uassert(ErrorCodes::FailedToParse, + str::stream() << DocumentSourceTelemetry::kStageName + << "hmacKey must be greater than or equal to 32 bytes", + len >= 32); + return {{data, (size_t)len}}; + } + uasserted(ErrorCodes::FailedToParse, + str::stream() + << DocumentSourceTelemetry::kStageName + << " hmacKey parameter must be bindata of length 32 or greater. Found type: " + << typeName(type)); + } + return boost::none; +} + +/** + * Parse the spec object calling the `ctor` with the bool applyHmacToIdentifiers and std::string + * hmacKey arguments. + */ +template +auto parseSpec(const BSONElement& spec, const Ctor& ctor) { + uassert(ErrorCodes::FailedToParse, + str::stream() << DocumentSourceTelemetry::kStageName + << " value must be an object. Found: " << typeName(spec.type()), + spec.type() == BSONType::Object); + + bool applyHmacToIdentifiers = false; + std::string hmacKey; + for (auto&& el : spec.embeddedObject()) { + if (auto maybeApplyHmacToIdentifiers = parseApplyHmacToIdentifiers(el); + maybeApplyHmacToIdentifiers) { + applyHmacToIdentifiers = *maybeApplyHmacToIdentifiers; + } else if (auto maybeHmacKey = parseHmacKey(el); maybeHmacKey) { + hmacKey = *maybeHmacKey; + } else { + uasserted(ErrorCodes::FailedToParse, + str::stream() + << DocumentSourceTelemetry::kStageName + << " parameters object may only contain 'applyHmacToIdentifiers' or " + "'hmacKey' options. Found: " + << el.fieldName()); + } + } + + return ctor(applyHmacToIdentifiers, hmacKey); +} + +} // namespace + +std::unique_ptr DocumentSourceTelemetry::LiteParsed::parse( + const NamespaceString& nss, const BSONElement& spec) { + return parseSpec(spec, [&](bool applyHmacToIdentifiers, std::string hmacKey) { + return std::make_unique( + spec.fieldName(), applyHmacToIdentifiers, hmacKey); + }); +} + +boost::intrusive_ptr DocumentSourceTelemetry::createFromBson( + BSONElement spec, const boost::intrusive_ptr& pExpCtx) { + const NamespaceString& nss = pExpCtx->ns; + + uassert(ErrorCodes::InvalidNamespace, + "$telemetry must be run against the 'admin' database with {aggregate: 1}", + nss.db() == DatabaseName::kAdmin.db() && nss.isCollectionlessAggregateNS()); + + return parseSpec(spec, [&](bool applyHmacToIdentifiers, std::string hmacKey) { + return new DocumentSourceTelemetry(pExpCtx, applyHmacToIdentifiers, hmacKey); + }); +} + +Value DocumentSourceTelemetry::serialize(SerializationOptions opts) const { + // This document source never contains any user information, so no need for any work when + // applying hmac. + return Value{Document{{kStageName, Document{}}}}; +} + +DocumentSource::GetNextResult DocumentSourceTelemetry::doGetNext() { + /** + * We maintain nested iterators: + * - Outer one over the set of partitions. + * - Inner one over the set of entries in a "materialized" partition. + * + * When an inner iterator is present and contains more elements, we can return the next element. + * When the inner iterator is exhausted, we move to the next element in the outer iterator and + * create a new inner iterator. When the outer iterator is exhausted, we have finished iterating + * over the telemetry store entries. + * + * The inner iterator iterates over a materialized container of all entries in the partition. + * This is done to reduce the time under which the partition lock is held. + */ + while (true) { + // First, attempt to exhaust all elements in the materialized partition. + if (!_materializedPartition.empty()) { + // Move out of the container reference. + auto doc = std::move(_materializedPartition.front()); + _materializedPartition.pop_front(); + return {std::move(doc)}; + } + + TelemetryStore& _telemetryStore = getTelemetryStore(getContext()->opCtx); + + // Materialized partition is exhausted, move to the next. + _currentPartition++; + if (_currentPartition >= _telemetryStore.numPartitions()) { + return DocumentSource::GetNextResult::makeEOF(); + } + + // We only keep the partition (which holds a lock) for the time needed to materialize it to + // a set of Document instances. + auto&& partition = _telemetryStore.getPartition(_currentPartition); + + // Capture the time at which reading the partition begins to indicate to the caller + // when the snapshot began. + const auto partitionReadTime = + Timestamp{Timestamp(Date_t::now().toMillisSinceEpoch() / 1000, 0)}; + for (auto&& [key, metrics] : *partition) { + try { + auto telemetryKey = + metrics->computeTelemetryKey(pExpCtx->opCtx, _applyHmacToIdentifiers, _hmacKey); + _materializedPartition.push_back({{"key", std::move(telemetryKey)}, + {"metrics", metrics->toBSON()}, + {"asOf", partitionReadTime}}); + } catch (const DBException& ex) { + telemetryHmacApplicationErrors.increment(); + LOGV2_DEBUG(7349403, + 3, + "Error encountered when applying hmac to query shape, will not publish " + "telemetry for this entry.", + "status"_attr = ex.toStatus(), + "hash"_attr = key); + if (kDebugBuild) { + tasserted(7349401, + "Was not able to re-parse telemetry key when reading telemetry."); + } + } + } + } +} + +} // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_telemetry.h b/src/mongo/db/pipeline/document_source_telemetry.h new file mode 100644 index 00000000000..c71bff210ac --- /dev/null +++ b/src/mongo/db/pipeline/document_source_telemetry.h @@ -0,0 +1,147 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/pipeline/document_source.h" +#include "mongo/db/pipeline/lite_parsed_document_source.h" +#include "mongo/db/query/telemetry.h" +#include "mongo/util/producer_consumer_queue.h" + +namespace mongo { + +using namespace telemetry; + +class DocumentSourceTelemetry final : public DocumentSource { +public: + static constexpr StringData kStageName = "$telemetry"_sd; + + class LiteParsed final : public LiteParsedDocumentSource { + public: + static std::unique_ptr parse(const NamespaceString& nss, + const BSONElement& spec); + + LiteParsed(std::string parseTimeName, bool applyHmacToIdentifiers, std::string hmacKey) + : LiteParsedDocumentSource(std::move(parseTimeName)), + _applyHmacToIdentifiers(applyHmacToIdentifiers), + _hmacKey(hmacKey) {} + + stdx::unordered_set getInvolvedNamespaces() const override { + return stdx::unordered_set(); + } + + PrivilegeVector requiredPrivileges(bool isMongos, + bool bypassDocumentValidation) const override { + return {Privilege(ResourcePattern::forClusterResource(), ActionType::telemetryRead)}; + ; + } + + bool allowedToPassthroughFromMongos() const final { + // $telemetry must be run locally on a mongod. + return false; + } + + bool isInitialSource() const final { + return true; + } + + void assertSupportsMultiDocumentTransaction() const { + transactionNotSupported(kStageName); + } + + bool _applyHmacToIdentifiers; + + std::string _hmacKey; + }; + + static boost::intrusive_ptr createFromBson( + BSONElement elem, const boost::intrusive_ptr& pExpCtx); + + virtual ~DocumentSourceTelemetry() = default; + + StageConstraints constraints( + Pipeline::SplitState = Pipeline::SplitState::kUnsplit) const override { + StageConstraints constraints{StreamType::kStreaming, + PositionRequirement::kFirst, + HostTypeRequirement::kLocalOnly, + DiskUseRequirement::kNoDiskUse, + FacetRequirement::kNotAllowed, + TransactionRequirement::kNotAllowed, + LookupRequirement::kNotAllowed, + UnionRequirement::kNotAllowed}; + + constraints.requiresInputDocSource = false; + constraints.isIndependentOfAnyCollection = true; + return constraints; + } + + boost::optional distributedPlanLogic() final { + return boost::none; + } + + const char* getSourceName() const override { + return kStageName.rawData(); + } + + Value serialize(SerializationOptions opts = SerializationOptions()) const final override; + + void addVariableRefs(std::set* refs) const final {} + +private: + DocumentSourceTelemetry(const boost::intrusive_ptr& expCtx, + bool applyHmacToIdentifiers = false, + std::string hmacKey = {}) + : DocumentSource(kStageName, expCtx), + _applyHmacToIdentifiers(applyHmacToIdentifiers), + _hmacKey(hmacKey) {} + + GetNextResult doGetNext() final; + + /** + * The current partition materialized as a set of Document instances. We pop from the queue and + * return DocumentSource results. + */ + std::deque _materializedPartition; + + /** + * Iterator over all telemetry partitions. This is incremented when we exhaust the current + * _materializedPartition. + */ + TelemetryStore::PartitionId _currentPartition = -1; + + // When true, apply hmac to field names from returned query shapes. + bool _applyHmacToIdentifiers; + + /** + * Key used for SHA-256 HMAC application on field names. + */ + std::string _hmacKey; +}; + +} // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_telemetry_test.cpp b/src/mongo/db/pipeline/document_source_telemetry_test.cpp new file mode 100644 index 00000000000..d08ce06b98c --- /dev/null +++ b/src/mongo/db/pipeline/document_source_telemetry_test.cpp @@ -0,0 +1,95 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/exec/document_value/document.h" +#include "mongo/db/exec/document_value/document_value_test_util.h" +#include "mongo/db/pipeline/aggregation_context_fixture.h" +#include "mongo/db/pipeline/document_source_telemetry.h" +#include "mongo/unittest/unittest.h" +#include "mongo/util/assert_util.h" +#include "mongo/util/str.h" + +namespace mongo { +namespace { + +/** + * Subclass AggregationContextFixture to set the ExpressionContext's namespace to 'admin' with + * {aggregate: 1} by default, so that parsing tests other than those which validate the namespace do + * not need to explicitly set it. + */ +class DocumentSourceTelemetryTest : public AggregationContextFixture { +public: + DocumentSourceTelemetryTest() + : AggregationContextFixture( + NamespaceString::makeCollectionlessAggregateNSS(DatabaseName::kAdmin)) {} +}; + +TEST_F(DocumentSourceTelemetryTest, ShouldFailToParseIfSpecIsNotObject) { + ASSERT_THROWS_CODE(DocumentSourceTelemetry::createFromBson( + fromjson("{$telemetry: 1}").firstElement(), getExpCtx()), + AssertionException, + ErrorCodes::FailedToParse); +} + +TEST_F(DocumentSourceTelemetryTest, ShouldFailToParseIfNotRunOnAdmin) { + getExpCtx()->ns = NamespaceString::makeCollectionlessAggregateNSS( + DatabaseName::createDatabaseName_forTest(boost::none, "foo")); + ASSERT_THROWS_CODE(DocumentSourceTelemetry::createFromBson( + fromjson("{$telemetry: {}}").firstElement(), getExpCtx()), + AssertionException, + ErrorCodes::InvalidNamespace); +} + +TEST_F(DocumentSourceTelemetryTest, ShouldFailToParseIfNotRunWithAggregateOne) { + getExpCtx()->ns = NamespaceString::createNamespaceString_forTest("admin.foo"); + ASSERT_THROWS_CODE(DocumentSourceTelemetry::createFromBson( + fromjson("{$telemetry: {}}").firstElement(), getExpCtx()), + AssertionException, + ErrorCodes::InvalidNamespace); +} + +TEST_F(DocumentSourceTelemetryTest, ShouldFailToParseIfUnrecognisedParameterSpecified) { + ASSERT_THROWS_CODE(DocumentSourceTelemetry::createFromBson( + fromjson("{$telemetry: {foo: true}}").firstElement(), getExpCtx()), + AssertionException, + ErrorCodes::FailedToParse); +} + +TEST_F(DocumentSourceTelemetryTest, ParseAndSerialize) { + auto obj = fromjson("{$telemetry: {}}"); + auto doc = DocumentSourceTelemetry::createFromBson(obj.firstElement(), getExpCtx()); + auto telemetryOp = static_cast(doc.get()); + auto expected = Document{{"$telemetry", Document{}}}; + ASSERT_DOCUMENT_EQ(telemetryOp->serialize().getDocument(), expected); +} + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/pipeline/visitors/document_source_visitor_registry_mongod.h b/src/mongo/db/pipeline/visitors/document_source_visitor_registry_mongod.h index 32ec042f6dc..24d11c814be 100644 --- a/src/mongo/db/pipeline/visitors/document_source_visitor_registry_mongod.h +++ b/src/mongo/db/pipeline/visitors/document_source_visitor_registry_mongod.h @@ -70,7 +70,6 @@ #include "mongo/db/pipeline/document_source_operation_metrics.h" #include "mongo/db/pipeline/document_source_out.h" #include "mongo/db/pipeline/document_source_plan_cache_stats.h" -#include "mongo/db/pipeline/document_source_query_stats.h" #include "mongo/db/pipeline/document_source_queue.h" #include "mongo/db/pipeline/document_source_redact.h" #include "mongo/db/pipeline/document_source_replace_root.h" @@ -84,6 +83,7 @@ #include "mongo/db/pipeline/document_source_sort.h" #include "mongo/db/pipeline/document_source_streaming_group.h" #include "mongo/db/pipeline/document_source_tee_consumer.h" +#include "mongo/db/pipeline/document_source_telemetry.h" #include "mongo/db/pipeline/document_source_union_with.h" #include "mongo/db/pipeline/document_source_unwind.h" #include "mongo/db/pipeline/visitors/document_source_visitor_registry.h" @@ -169,7 +169,7 @@ void registerMongodVisitor(ServiceContext* service) { DocumentSourceSort, DocumentSourceStreamingGroup, DocumentSourceTeeConsumer, - DocumentSourceQueryStats, + DocumentSourceTelemetry, DocumentSourceUnionWith, DocumentSourceUnwind>(®istry); } diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript index 7f9e1c69a00..22e24674e1d 100644 --- a/src/mongo/db/query/SConscript +++ b/src/mongo/db/query/SConscript @@ -262,7 +262,7 @@ env.Library( 'query_feature_flags.idl', 'query_knobs.idl', 'sbe_plan_cache_on_parameter_change.cpp', - 'query_stats_util.cpp', + 'telemetry_util.cpp', ], LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/db/commands/test_commands_enabled', @@ -366,7 +366,7 @@ env.Library( target='op_metrics', source=[ 'query_shape.cpp', - 'query_stats.cpp', + 'telemetry.cpp', ], LIBDEPS=[ '$BUILD_DIR/mongo/base', @@ -448,7 +448,6 @@ env.CppUnitTest( "query_settings_test.cpp", "query_shape_test.cpp", "query_shape_test.idl", - "query_stats_store_test.cpp", "query_solution_test.cpp", "rate_limiting_test.cpp", "sbe_and_hash_test.cpp", @@ -462,6 +461,7 @@ env.CppUnitTest( "sbe_stage_builder_type_checker_test.cpp", "shard_filterer_factory_mock.cpp", "sort_pattern_test.cpp", + "telemetry_store_test.cpp", "util/memory_util_test.cpp", "view_response_formatter_test.cpp", 'map_reduce_output_format_test.cpp', diff --git a/src/mongo/db/query/cqf_command_utils.cpp b/src/mongo/db/query/cqf_command_utils.cpp index 5db15a5ceee..a6279c43400 100644 --- a/src/mongo/db/query/cqf_command_utils.cpp +++ b/src/mongo/db/query/cqf_command_utils.cpp @@ -111,7 +111,6 @@ #include "mongo/db/pipeline/document_source_operation_metrics.h" #include "mongo/db/pipeline/document_source_out.h" #include "mongo/db/pipeline/document_source_plan_cache_stats.h" -#include "mongo/db/pipeline/document_source_query_stats.h" #include "mongo/db/pipeline/document_source_queue.h" #include "mongo/db/pipeline/document_source_redact.h" #include "mongo/db/pipeline/document_source_replace_root.h" @@ -125,6 +124,7 @@ #include "mongo/db/pipeline/document_source_sort.h" #include "mongo/db/pipeline/document_source_streaming_group.h" #include "mongo/db/pipeline/document_source_tee_consumer.h" +#include "mongo/db/pipeline/document_source_telemetry.h" #include "mongo/db/pipeline/document_source_union_with.h" #include "mongo/db/pipeline/document_source_unwind.h" #include "mongo/db/pipeline/visitors/document_source_visitor_registry_mongod.h" diff --git a/src/mongo/db/query/find.cpp b/src/mongo/db/query/find.cpp index dcd402e9c70..c54138afd4b 100644 --- a/src/mongo/db/query/find.cpp +++ b/src/mongo/db/query/find.cpp @@ -112,7 +112,7 @@ void endQueryOp(OperationContext* opCtx, auto curOp = CurOp::get(opCtx); // Fill out basic CurOp query exec properties. More metrics (nreturned and executionTime) - // are collected within collectQueryStatsMongod. + // are collected within collectTelemetryMongod. curOp->debug().cursorid = (cursor.has_value() ? cursor->getCursor()->cursorid() : -1); curOp->debug().cursorExhausted = !cursor.has_value(); curOp->debug().additiveMetrics.nBatches = 1; @@ -125,9 +125,9 @@ void endQueryOp(OperationContext* opCtx, curOp->setEndOfOpMetrics(numResults); if (cursor) { - collectQueryStatsMongod(opCtx, *cursor); + collectTelemetryMongod(opCtx, *cursor); } else { - collectQueryStatsMongod(opCtx, std::move(curOp->debug().queryStatsRequestShapifier)); + collectTelemetryMongod(opCtx, std::move(curOp->debug().telemetryRequestShapifier)); } if (collection) { diff --git a/src/mongo/db/query/find_request_shapifier.cpp b/src/mongo/db/query/find_request_shapifier.cpp index 83560f3acdb..8002a152a13 100644 --- a/src/mongo/db/query/find_request_shapifier.cpp +++ b/src/mongo/db/query/find_request_shapifier.cpp @@ -34,7 +34,7 @@ #include "mongo/db/query/query_request_helper.h" #include "mongo/db/query/query_shape.h" -namespace mongo::query_stats { +namespace mongo::telemetry { void addNonShapeObjCmdLiterals(BSONObjBuilder* bob, const FindCommandRequest& findCommand, @@ -58,8 +58,8 @@ void addNonShapeObjCmdLiterals(BSONObjBuilder* bob, } -BSONObj FindRequestShapifier::makeQueryStatsKey(const SerializationOptions& opts, - OperationContext* opCtx) const { +BSONObj FindRequestShapifier::makeTelemetryKey(const SerializationOptions& opts, + OperationContext* opCtx) const { auto expCtx = make_intrusive( opCtx, _request, nullptr /* collator doesn't matter here.*/, false /* mayDbProfile */); expCtx->maxFeatureCompatibilityVersion = boost::none; // Ensure all features are allowed. @@ -67,10 +67,10 @@ BSONObj FindRequestShapifier::makeQueryStatsKey(const SerializationOptions& opts // expressions/stages, so it's a side effect tied to parsing. We must stop expression counters // before re-parsing to avoid adding to the counters more than once per a given query. expCtx->stopExpressionCounters(); - return makeQueryStatsKey(opts, expCtx); + return makeTelemetryKey(opts, expCtx); } -BSONObj FindRequestShapifier::makeQueryStatsKey( +BSONObj FindRequestShapifier::makeTelemetryKey( const SerializationOptions& opts, const boost::intrusive_ptr& expCtx) const { BSONObjBuilder bob; @@ -102,4 +102,4 @@ BSONObj FindRequestShapifier::makeQueryStatsKey( return bob.obj(); } -} // namespace mongo::query_stats +} // namespace mongo::telemetry diff --git a/src/mongo/db/query/find_request_shapifier.h b/src/mongo/db/query/find_request_shapifier.h index 79f8223052a..b03f84eb1ab 100644 --- a/src/mongo/db/query/find_request_shapifier.h +++ b/src/mongo/db/query/find_request_shapifier.h @@ -32,7 +32,7 @@ #include "mongo/db/query/find_command_gen.h" #include "mongo/db/query/request_shapifier.h" -namespace mongo::query_stats { +namespace mongo::telemetry { /** * Handles shapification for FindCommandRequests. @@ -49,13 +49,12 @@ public: virtual ~FindRequestShapifier() = default; - BSONObj makeQueryStatsKey(const SerializationOptions& opts, - OperationContext* opCtx) const final; + BSONObj makeTelemetryKey(const SerializationOptions& opts, OperationContext* opCtx) const final; - BSONObj makeQueryStatsKey(const SerializationOptions& opts, - const boost::intrusive_ptr& expCtx) const final; + BSONObj makeTelemetryKey(const SerializationOptions& opts, + const boost::intrusive_ptr& expCtx) const final; private: FindCommandRequest _request; }; -} // namespace mongo::query_stats +} // namespace mongo::telemetry diff --git a/src/mongo/db/query/query_feature_flags.idl b/src/mongo/db/query/query_feature_flags.idl index e18477beb9c..cbd970ca47d 100644 --- a/src/mongo/db/query/query_feature_flags.idl +++ b/src/mongo/db/query/query_feature_flags.idl @@ -90,9 +90,9 @@ feature_flags: default: false shouldBeFCVGated: true - featureFlagQueryStats: - description: "Feature flag for enabling the queryStats store." - cpp_varname: gFeatureFlagQueryStats + featureFlagTelemetry: + description: "Feature flag for enabling the telemetry store." + cpp_varname: gFeatureFlagTelemetry default: false shouldBeFCVGated: true diff --git a/src/mongo/db/query/query_knobs.idl b/src/mongo/db/query/query_knobs.idl index 4fc1e362524..d631ab42d3d 100644 --- a/src/mongo/db/query/query_knobs.idl +++ b/src/mongo/db/query/query_knobs.idl @@ -36,7 +36,7 @@ global: - "mongo/db/query/ce_mode_parameter.h" - "mongo/db/query/explain_version_validator.h" - "mongo/db/query/sbe_plan_cache_on_parameter_change.h" - - "mongo/db/query/query_stats_util.h" + - "mongo/db/query/telemetry_util.h" - "mongo/platform/atomic_proxy.h" - "mongo/platform/atomic_word.h" @@ -1018,32 +1018,32 @@ server_parameters: default: false test_only: true - internalQueryStatsSamplingRate: - description: "The maximum number of queries per second that are sampled for query stats. + internalQueryConfigureTelemetrySamplingRate: + description: "The maximum number of queries per second that are sampled for query telemetry. If the rate of queries goes above this number, then rate limiting will kick in, and any further queries will not be sampled. To sample all queries, this can be set to -1. This can be - set to 0 to turn queryStats off completely." + set to 0 to turn telemetry off completely." set_at: [ startup, runtime ] - cpp_varname: "queryQueryStatsSamplingRate" + cpp_varname: "queryTelemetrySamplingRate" cpp_vartype: AtomicWord default: 0 validator: gte: -1 - on_update: query_stats_util::onQueryStatsSamplingRateUpdate + on_update: telemetry_util::onTelemetrySamplingRateUpdate - internalQueryConfigureQueryStatsCacheSize: - description: "The maximum amount of memory that the system will allocate for the query queryStats + internalQueryConfigureTelemetryCacheSize: + description: "The maximum amount of memory that the system will allocate for the query telemetry cache. This will accept values in either of the following formats: 1. % indicates a percentage of the physical memory available to the process. E.g.: 15%. 2. (MB|GB), indicates the amount of memory in MB or GB. E.g.: 1.5GB, 100MB. The default value is 1%, which means 1% of the physical memory available to the process." set_at: [ startup, runtime ] - cpp_varname: "queryQueryStatsStoreSize" + cpp_varname: "queryTelemetryStoreSize" cpp_vartype: synchronized_value default: "1%" - on_update: query_stats_util::onQueryStatsStoreSizeUpdate + on_update: telemetry_util::onTelemetryStoreSizeUpdate validator: - callback: query_stats_util::validateQueryStatsStoreSize + callback: telemetry_util::validateTelemetryStoreSize internalQueryColumnScanMinCollectionSizeBytes: description: "The min collection size threshold for which column scan will always be allowed. If @@ -1130,7 +1130,7 @@ server_parameters: default: 60000 validator: gte: 0 - + internalQueryAggMulticastMaxConcurrency: description: "Max number of concurrent requests when aggregations are sent to all shard servers" set_at: startup @@ -1173,8 +1173,8 @@ server_parameters: gte: 0 internalQueryAutoParameterizationMaxParameterCount: - description: "The maximum numbers of parameters that query auto-parameterization can extract from a query. - If auto parameterizating a query would result in a greater number of parameters than the limit, + description: "The maximum numbers of parameters that query auto-parameterization can extract from a query. + If auto parameterizating a query would result in a greater number of parameters than the limit, then auto parameterization will not be performed. If set to 0, then no limit will be applied." set_at: [ startup, runtime ] diff --git a/src/mongo/db/query/query_shape.cpp b/src/mongo/db/query/query_shape.cpp index 3f9ed7fbfb6..519b1115558 100644 --- a/src/mongo/db/query/query_shape.cpp +++ b/src/mongo/db/query/query_shape.cpp @@ -227,7 +227,7 @@ BSONObj extractQueryShape(const FindCommandRequest& findCommand, expCtx, ExtensionsCallbackNoop(), MatchExpressionParser::kAllowAllSpecialFeatures), - "Failed to parse 'filter' option when making queryStats key"); + "Failed to parse 'filter' option when making telemetry key"); bob.append(FindCommandRequest::kFilterFieldName, filterExpr->serialize(opts)); } diff --git a/src/mongo/db/query/query_shape.h b/src/mongo/db/query/query_shape.h index c0d4328d08b..0fa0d7c863e 100644 --- a/src/mongo/db/query/query_shape.h +++ b/src/mongo/db/query/query_shape.h @@ -40,7 +40,7 @@ constexpr StringData kLiteralArgString = "?"_sd; /** * Computes a BSONObj that is meant to be used to classify queries according to their shape, for the - * purposes of collecting queryStats. + * purposes of collecting telemetry. * * For example, if the MatchExpression represents {a: 2}, it will return the same BSONObj as the * MatchExpression for {a: 1}, {a: 10}, and {a: {$eq: 2}} (identical bits but not sharing memory) diff --git a/src/mongo/db/query/query_stats.cpp b/src/mongo/db/query/query_stats.cpp deleted file mode 100644 index 6b99a43fc3f..00000000000 --- a/src/mongo/db/query/query_stats.cpp +++ /dev/null @@ -1,555 +0,0 @@ -/** - * Copyright (C) 2022-present MongoDB, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the Server Side Public License, version 1, - * as published by MongoDB, Inc. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Server Side Public License for more details. - * - * You should have received a copy of the Server Side Public License - * along with this program. If not, see - * . - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the Server Side Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#include "mongo/db/query/query_stats.h" - -#include "mongo/crypto/hash_block.h" -#include "mongo/crypto/sha256_block.h" -#include "mongo/db/concurrency/d_concurrency.h" -#include "mongo/db/concurrency/locker.h" -#include "mongo/db/curop.h" -#include "mongo/db/exec/projection_executor_builder.h" -#include "mongo/db/namespace_string.h" -#include "mongo/db/pipeline/aggregate_command_gen.h" -#include "mongo/db/pipeline/process_interface/stub_mongo_process_interface.h" -#include "mongo/db/query/find_command_gen.h" -#include "mongo/db/query/plan_explainer.h" -#include "mongo/db/query/projection_ast_util.h" -#include "mongo/db/query/projection_parser.h" -#include "mongo/db/query/query_feature_flags_gen.h" -#include "mongo/db/query/query_planner_params.h" -#include "mongo/db/query/query_request_helper.h" -#include "mongo/db/query/query_stats_util.h" -#include "mongo/db/query/rate_limiting.h" -#include "mongo/db/query/serialization_options.h" -#include "mongo/db/query/sort_pattern.h" -#include "mongo/logv2/log.h" -#include "mongo/rpc/metadata/client_metadata.h" -#include "mongo/util/assert_util.h" -#include "mongo/util/debug_util.h" -#include "mongo/util/processinfo.h" -#include "mongo/util/system_clock_source.h" -#include "query_shape.h" -#include - -#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery - -namespace mongo { - -namespace query_stats { - -/** - * Redacts all BSONObj field names as if they were paths, unless the field name is a special hint - * operator. - */ -namespace { - -boost::optional getApplicationName(const OperationContext* opCtx) { - if (auto metadata = ClientMetadata::get(opCtx->getClient())) { - return metadata->getApplicationName().toString(); - } - return boost::none; -} -} // namespace - -CounterMetric queryStatsStoreSizeEstimateBytesMetric("queryStats.queryStatsStoreSizeEstimateBytes"); - -namespace { - -CounterMetric queryStatsEvictedMetric("queryStats.numEvicted"); -CounterMetric queryStatsRateLimitedRequestsMetric("queryStats.numRateLimitedRequests"); -CounterMetric queryStatsStoreWriteErrorsMetric("queryStats.numQueryStatsStoreWriteErrors"); - -/** - * Cap the queryStats store size. - */ -size_t capQueryStatsStoreSize(size_t requestedSize) { - size_t cappedStoreSize = memory_util::capMemorySize( - requestedSize /*requestedSizeBytes*/, 1 /*maximumSizeGB*/, 25 /*percentTotalSystemMemory*/); - // If capped size is less than requested size, the queryStats store has been capped at its - // upper limit. - if (cappedStoreSize < requestedSize) { - LOGV2_DEBUG(7106502, - 1, - "The queryStats store size has been capped", - "cappedSize"_attr = cappedStoreSize); - } - return cappedStoreSize; -} - -/** - * Get the queryStats store size based on the query job's value. - */ -size_t getQueryStatsStoreSize() { - auto status = memory_util::MemorySize::parse(queryQueryStatsStoreSize.get()); - uassertStatusOK(status); - size_t requestedSize = memory_util::convertToSizeInBytes(status.getValue()); - return capQueryStatsStoreSize(requestedSize); -} - -/** - * A manager for the queryStats store allows a "pointer swap" on the queryStats store itself. The - * usage patterns are as follows: - * - * - Updating the queryStats store uses the `getQueryStatsStore()` method. The queryStats store - * instance is obtained, entries are looked up and mutated, or created anew. - * - The queryStats store is "reset". This involves atomically allocating a new instance, once - * there are no more updaters (readers of the store "pointer"), and returning the existing - * instance. - */ -class QueryStatsStoreManager { -public: - template - QueryStatsStoreManager(size_t cacheSize, size_t numPartitions) - : _queryStatsStore(std::make_unique(cacheSize, numPartitions)), - _maxSize(cacheSize) {} - - /** - * Acquire the instance of the queryStats store. - */ - QueryStatsStore& getQueryStatsStore() { - return *_queryStatsStore; - } - - size_t getMaxSize() { - return _maxSize; - } - - /** - * Resize the queryStats store and return the number of evicted - * entries. - */ - size_t resetSize(size_t cacheSize) { - _maxSize = cacheSize; - return _queryStatsStore->reset(cacheSize); - } - -private: - std::unique_ptr _queryStatsStore; - - /** - * Max size of the queryStats store. Tracked here to avoid having to recompute after it's - * divided up into partitions. - */ - size_t _maxSize; -}; - -const auto queryStatsStoreDecoration = - ServiceContext::declareDecoration>(); - -const auto queryStatsRateLimiter = - ServiceContext::declareDecoration>(); - -class TelemetryOnParamChangeUpdaterImpl final : public query_stats_util::OnParamChangeUpdater { -public: - void updateCacheSize(ServiceContext* serviceCtx, memory_util::MemorySize memSize) final { - auto requestedSize = memory_util::convertToSizeInBytes(memSize); - auto cappedSize = capQueryStatsStoreSize(requestedSize); - auto& queryStatsStoreManager = queryStatsStoreDecoration(serviceCtx); - size_t numEvicted = queryStatsStoreManager->resetSize(cappedSize); - queryStatsEvictedMetric.increment(numEvicted); - } - - void updateSamplingRate(ServiceContext* serviceCtx, int samplingRate) { - queryStatsRateLimiter(serviceCtx).get()->setSamplingRate(samplingRate); - } -}; - -ServiceContext::ConstructorActionRegisterer queryStatsStoreManagerRegisterer{ - "QueryStatsStoreManagerRegisterer", [](ServiceContext* serviceCtx) { - // It is possible that this is called before FCV is properly set up. Setting up the store if - // the flag is enabled but FCV is incorrect is safe, and guards against the FCV being - // changed to a supported version later. - if (!feature_flags::gFeatureFlagQueryStats.isEnabledAndIgnoreFCVUnsafeAtStartup()) { - // featureFlags are not allowed to be changed at runtime. Therefore it's not an issue - // to not create a queryStats store in ConstructorActionRegisterer at start up with the - // flag off - because the flag can not be turned on at any point afterwards. - query_stats_util::queryStatsStoreOnParamChangeUpdater(serviceCtx) = - std::make_unique(); - return; - } - - query_stats_util::queryStatsStoreOnParamChangeUpdater(serviceCtx) = - std::make_unique(); - size_t size = getQueryStatsStoreSize(); - auto&& globalQueryStatsStoreManager = queryStatsStoreDecoration(serviceCtx); - // The plan cache and queryStats store should use the same number of partitions. - // That is, the number of cpu cores. - size_t numPartitions = ProcessInfo::getNumCores(); - size_t partitionBytes = size / numPartitions; - size_t metricsSize = sizeof(QueryStatsEntry); - if (partitionBytes < metricsSize * 10) { - numPartitions = size / metricsSize; - if (numPartitions < 1) { - numPartitions = 1; - } - } - globalQueryStatsStoreManager = - std::make_unique(size, numPartitions); - auto configuredSamplingRate = queryQueryStatsSamplingRate.load(); - queryStatsRateLimiter(serviceCtx) = std::make_unique( - configuredSamplingRate < 0 ? INT_MAX : configuredSamplingRate); - }}; - -/** - * Top-level checks for whether queryStats collection is enabled. If this returns false, we must go - * no further. - */ -bool isQueryStatsEnabled(const ServiceContext* serviceCtx) { - // During initialization FCV may not yet be setup but queries could be run. We can't - // check whether queryStats should be enabled without FCV, so default to not recording - // those queries. - // TODO SERVER-75935 Remove FCV Check. - return feature_flags::gFeatureFlagQueryStats.isEnabled( - serverGlobalParams.featureCompatibility) && - queryStatsStoreDecoration(serviceCtx)->getMaxSize() > 0; -} - -/** - * Internal check for whether we should collect metrics. This checks the rate limiting - * configuration for a global on/off decision and, if enabled, delegates to the rate limiter. - */ -bool shouldCollect(const ServiceContext* serviceCtx) { - // Quick escape if queryStats is turned off. - if (!isQueryStatsEnabled(serviceCtx)) { - return false; - } - // Cannot collect queryStats if sampling rate is not greater than 0. Note that we do not - // increment queryStatsRateLimitedRequestsMetric here since queryStats is entirely disabled. - if (queryStatsRateLimiter(serviceCtx)->getSamplingRate() <= 0) { - return false; - } - // Check if rate limiting allows us to collect queryStats for this request. - if (queryStatsRateLimiter(serviceCtx)->getSamplingRate() < INT_MAX && - !queryStatsRateLimiter(serviceCtx)->handleRequestSlidingWindow()) { - queryStatsRateLimitedRequestsMetric.increment(); - return false; - } - return true; -} - -/** - * Add a field to the find op's queryStats key. The `value` will have hmac applied. - */ -void addToFindKey(BSONObjBuilder& builder, const StringData& fieldName, const BSONObj& value) { - serializeBSONWhenNotEmpty(value.redact(false), fieldName, &builder); -} - -/** - * Recognize FLE payloads in a query and throw an exception if found. - */ -void throwIfEncounteringFLEPayload(const BSONElement& e) { - constexpr auto safeContentLabel = "__safeContent__"_sd; - constexpr auto fieldpath = "$__safeContent__"_sd; - if (e.type() == BSONType::Object) { - auto fieldname = e.fieldNameStringData(); - uassert(ErrorCodes::EncounteredFLEPayloadWhileApplyingHmac, - "Encountered __safeContent__, or an $_internalFle operator, which indicate a " - "rewritten FLE2 query.", - fieldname != safeContentLabel && !fieldname.startsWith("$_internalFle"_sd)); - } else if (e.type() == BSONType::String) { - auto val = e.valueStringData(); - uassert(ErrorCodes::EncounteredFLEPayloadWhileApplyingHmac, - "Encountered $__safeContent__ fieldpath, which indicates a rewritten FLE2 query.", - val != fieldpath); - } else if (e.type() == BSONType::BinData && e.isBinData(BinDataType::Encrypt)) { - int len; - auto data = e.binData(len); - uassert(ErrorCodes::EncounteredFLEPayloadWhileApplyingHmac, - "FLE1 Payload encountered in expression.", - len > 1 && data[1] != char(EncryptedBinDataType::kDeterministic)); - } -} - -/** - * Upon reading telemetry data, we apply hmac to some keys. This is the list. See - * QueryStatsEntry::makeQueryStatsKey(). - */ -const stdx::unordered_set kKeysToApplyHmac = {"pipeline", "find"}; - -std::string sha256HmacStringDataHasher(std::string key, const StringData& sd) { - auto hashed = SHA256Block::computeHmac( - (const uint8_t*)key.data(), key.size(), (const uint8_t*)sd.rawData(), sd.size()); - return hashed.toString(); -} - -std::string sha256HmacFieldNameHasher(std::string key, const BSONElement& e) { - auto&& fieldName = e.fieldNameStringData(); - return sha256HmacStringDataHasher(key, fieldName); -} - -std::string constantFieldNameHasher(const BSONElement& e) { - return {"###"}; -} - -/** - * Admittedly an abuse of the BSON redaction interface, we recognize FLE payloads here and avoid - * collecting queryStats for the query. - */ -std::string fleSafeFieldNameRedactor(const BSONElement& e) { - throwIfEncounteringFLEPayload(e); - // Ideally we would change interface to avoid copying here. - return e.fieldNameStringData().toString(); -} - -/** - * Append the element to the builder and apply hmac to any literals within the element. The element - * may be of any type. - */ -void appendWithAbstractedLiterals(BSONObjBuilder& builder, const BSONElement& el) { - if (el.type() == Object) { - builder.append(el.fieldNameStringData(), el.Obj().redact(false, fleSafeFieldNameRedactor)); - } else if (el.type() == Array) { - BSONObjBuilder arrayBuilder = builder.subarrayStart(fleSafeFieldNameRedactor(el)); - for (auto&& arrayElem : el.Obj()) { - appendWithAbstractedLiterals(arrayBuilder, arrayElem); - } - arrayBuilder.done(); - } else { - auto fieldName = fleSafeFieldNameRedactor(el); - builder.append(fieldName, "###"_sd); - } -} - -static const StringData replacementForLiteralArgs = "?"_sd; - -std::size_t hash(const BSONObj& obj) { - return absl::hash_internal::CityHash64(obj.objdata(), obj.objsize()); -} - -} // namespace - -BSONObj QueryStatsEntry::computeQueryStatsKey(OperationContext* opCtx, - bool applyHmacToIdentifiers, - std::string hmacKey) const { - // The telemetry key for find queries is generated by serializing all the command fields - // and applying hmac if SerializationOptions indicate to do so. The resulting key is of the - // form: - // { - // queryShape: { - // cmdNs: {db: "...", coll: "..."}, - // find: "...", - // filter: {"...": {"$eq": "?number"}}, - // }, - // applicationName: kHashedApplicationName - // } - // queryShape may include additional fields, eg hint, limit sort, etc, depending on the original - // query. - - // TODO SERVER-73152 incorporate aggregation request into same path so that nullptr check is - // unnecessary - if (requestShapifier != nullptr) { - auto serializationOpts = applyHmacToIdentifiers - ? SerializationOptions( - [&](StringData sd) { return sha256HmacStringDataHasher(hmacKey, sd); }, - LiteralSerializationPolicy::kToDebugTypeString) - : SerializationOptions(LiteralSerializationPolicy::kToDebugTypeString); - return requestShapifier->makeQueryStatsKey(serializationOpts, opCtx); - } - - // TODO SERVER-73152 remove all special aggregation logic below - // The telemetry key for agg queries is of the following form: - // { "agg": {...}, "namespace": "...", "applicationName": "...", ... } - // - // The part of the key we need to apply hmac to is the object in the element. In the - // case of an aggregate() command, it will look something like: > "pipeline" : [ { "$queryStats" - // : {} }, - // { "$addFields" : { "x" : { "$someExpr" {} } } } ], - // We should preserve the top-level stage names in the pipeline but apply hmac to all field - // names of children. - - // TODO: SERVER-73152 literal and field name redaction for aggregate command. - if (!applyHmacToIdentifiers) { - return oldQueryStatsKey; - } - BSONObjBuilder hmacAppliedBuilder; - for (BSONElement e : oldQueryStatsKey) { - if ((e.type() == Object || e.type() == Array) && - kKeysToApplyHmac.count(e.fieldNameStringData().toString()) == 1) { - auto hmacApplicator = [&](BSONObjBuilder subObj, const BSONObj& obj) { - for (BSONElement e2 : obj) { - if (e2.type() == Object) { - subObj.append(e2.fieldNameStringData(), - e2.Obj().redact(false, [&](const BSONElement& e) { - return sha256HmacFieldNameHasher(hmacKey, e); - })); - } else { - subObj.append(e2); - } - } - subObj.done(); - }; - - // Now we're inside the :{} entry and want to preserve the top-level field - // names. If it's a [pipeline] array, we redact each element in isolation. - if (e.type() == Object) { - hmacApplicator(hmacAppliedBuilder.subobjStart(e.fieldNameStringData()), e.Obj()); - } else { - BSONObjBuilder subArr = hmacAppliedBuilder.subarrayStart(e.fieldNameStringData()); - for (BSONElement stage : e.Obj()) { - hmacApplicator(subArr.subobjStart(""), stage.Obj()); - } - } - } else { - hmacAppliedBuilder.append(e); - } - } - return hmacAppliedBuilder.obj(); -} - -// The originating command/query does not persist through the end of query execution. In order to -// pair the queryStats metrics that are collected at the end of execution with the original query, -// it is necessary to register the original query during planning and persist it after execution. - -// During planning, registerRequest is called to serialize the query shape and context (together, -// the queryStats context) and save it to OpDebug. Moreover, as query execution may span more than -// one request/operation and OpDebug does not persist through cursor iteration, it is necessary to -// communicate the queryStats context across operations. In this way, the queryStats context is -// registered to the cursor, so upon getMore() calls, the cursor manager passes the queryStats key -// from the pinned cursor to the new OpDebug. - -// Once query execution is complete, the queryStats context is grabbed from OpDebug, a queryStats -// key is generated from this and metrics are paired to this key in the queryStats store. -void registerAggRequest(const AggregateCommandRequest& request, OperationContext* opCtx) { - if (!isQueryStatsEnabled(opCtx->getServiceContext())) { - return; - } - - // Queries against metadata collections should never appear in queryStats data. - if (request.getNamespace().isFLE2StateCollection()) { - return; - } - - if (!shouldCollect(opCtx->getServiceContext())) { - return; - } - - BSONObjBuilder queryStatsKey; - BSONObjBuilder pipelineBuilder = queryStatsKey.subarrayStart("pipeline"_sd); - try { - for (auto&& stage : request.getPipeline()) { - BSONObjBuilder stageBuilder = pipelineBuilder.subobjStart("stage"_sd); - appendWithAbstractedLiterals(stageBuilder, stage.firstElement()); - stageBuilder.done(); - } - pipelineBuilder.done(); - queryStatsKey.append("namespace", request.getNamespace().toString()); - if (request.getReadConcern()) { - queryStatsKey.append("readConcern", *request.getReadConcern()); - } - if (auto metadata = ClientMetadata::get(opCtx->getClient())) { - queryStatsKey.append("applicationName", metadata->getApplicationName()); - } - } catch (ExceptionFor&) { - return; - } - - BSONObj key = queryStatsKey.obj(); - CurOp::get(opCtx)->debug().queryStatsStoreKeyHash = hash(key); - CurOp::get(opCtx)->debug().queryStatsStoreKey = key.getOwned(); -} - -void registerRequest(std::unique_ptr requestShapifier, - const NamespaceString& collection, - OperationContext* opCtx, - const boost::intrusive_ptr& expCtx) { - if (!isQueryStatsEnabled(opCtx->getServiceContext())) { - return; - } - - // Queries against metadata collections should never appear in queryStats data. - if (collection.isFLE2StateCollection()) { - return; - } - - if (!shouldCollect(opCtx->getServiceContext())) { - return; - } - SerializationOptions options; - options.literalPolicy = LiteralSerializationPolicy::kToDebugTypeString; - options.replacementForLiteralArgs = replacementForLiteralArgs; - CurOp::get(opCtx)->debug().queryStatsStoreKeyHash = - hash(requestShapifier->makeQueryStatsKey(options, expCtx)); - CurOp::get(opCtx)->debug().queryStatsRequestShapifier = std::move(requestShapifier); -} - -QueryStatsStore& getQueryStatsStore(OperationContext* opCtx) { - uassert(6579000, - "Telemetry is not enabled without the feature flag on and a cache size greater than 0 " - "bytes", - isQueryStatsEnabled(opCtx->getServiceContext())); - return queryStatsStoreDecoration(opCtx->getServiceContext())->getQueryStatsStore(); -} - -void writeQueryStats(OperationContext* opCtx, - boost::optional queryStatsKeyHash, - boost::optional queryStatsKey, - std::unique_ptr requestShapifier, - const uint64_t queryExecMicros, - const uint64_t docsReturned) { - if (!queryStatsKeyHash) { - return; - } - auto&& queryStatsStore = getQueryStatsStore(opCtx); - auto&& [statusWithMetrics, partitionLock] = - queryStatsStore.getWithPartitionLock(*queryStatsKeyHash); - std::shared_ptr metrics; - if (statusWithMetrics.isOK()) { - metrics = *statusWithMetrics.getValue(); - } else { - BSONObj key = queryStatsKey.value_or(BSONObj{}); - size_t numEvicted = - queryStatsStore.put(*queryStatsKeyHash, - std::make_shared( - std::move(requestShapifier), CurOp::get(opCtx)->getNSS(), key), - partitionLock); - queryStatsEvictedMetric.increment(numEvicted); - auto newMetrics = partitionLock->get(*queryStatsKeyHash); - if (!newMetrics.isOK()) { - // This can happen if the budget is immediately exceeded. Specifically if the there is - // not enough room for a single new entry if the number of partitions is too high - // relative to the size. - queryStatsStoreWriteErrorsMetric.increment(); - LOGV2_DEBUG(7560900, - 1, - "Failed to store queryStats entry.", - "status"_attr = newMetrics.getStatus(), - "queryStatsKeyHash"_attr = queryStatsKeyHash); - return; - } - metrics = newMetrics.getValue()->second; - } - - metrics->lastExecutionMicros = queryExecMicros; - metrics->execCount++; - metrics->queryExecMicros.aggregate(queryExecMicros); - metrics->docsReturned.aggregate(docsReturned); -} -} // namespace query_stats -} // namespace mongo diff --git a/src/mongo/db/query/query_stats.h b/src/mongo/db/query/query_stats.h deleted file mode 100644 index 59d79d6c114..00000000000 --- a/src/mongo/db/query/query_stats.h +++ /dev/null @@ -1,224 +0,0 @@ -/** - * Copyright (C) 2022-present MongoDB, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the Server Side Public License, version 1, - * as published by MongoDB, Inc. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Server Side Public License for more details. - * - * You should have received a copy of the Server Side Public License - * along with this program. If not, see - * . - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the Server Side Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#pragma once - -#include "mongo/base/status.h" -#include "mongo/bson/bsonobj.h" -#include "mongo/db/concurrency/d_concurrency.h" -#include "mongo/db/curop.h" -#include "mongo/db/namespace_string.h" -#include "mongo/db/query/partitioned_cache.h" -#include "mongo/db/query/plan_explainer.h" -#include "mongo/db/query/request_shapifier.h" -#include "mongo/db/query/util/memory_util.h" -#include "mongo/db/service_context.h" -#include -#include - -namespace mongo { - -class OpDebug; -class AggregateCommandRequest; -class FindCommandRequest; - -namespace { -/** - * Type we use to render values to BSON. - */ -using BSONNumeric = long long; -} // namespace - -namespace query_stats { - -/** - * An aggregated metric stores a compressed view of data. It balances the loss of information - * with the reduction in required storage. - */ -struct AggregatedMetric { - - /** - * Aggregate an observed value into the metric. - */ - void aggregate(uint64_t val) { - sum += val; - max = std::max(val, max); - min = std::min(val, min); - sumOfSquares += val * val; - } - - void appendTo(BSONObjBuilder& builder, const StringData& fieldName) const { - BSONObjBuilder metricsBuilder = builder.subobjStart(fieldName); - metricsBuilder.append("sum", (BSONNumeric)sum); - metricsBuilder.append("max", (BSONNumeric)max); - metricsBuilder.append("min", (BSONNumeric)min); - metricsBuilder.append("sumOfSquares", (BSONNumeric)sumOfSquares); - metricsBuilder.done(); - } - - uint64_t sum = 0; - // Default to the _signed_ maximum (which fits in unsigned range) because we cast to - // BSONNumeric when serializing. - uint64_t min = (uint64_t)std::numeric_limits::max; - uint64_t max = 0; - - /** - * The sum of squares along with (an externally stored) count will allow us to compute the - * variance/stddev. - */ - uint64_t sumOfSquares = 0; -}; - -extern CounterMetric queryStatsStoreSizeEstimateBytesMetric; -// Used to aggregate the metrics for one telemetry key over all its executions. -class QueryStatsEntry { -public: - QueryStatsEntry(std::unique_ptr requestShapifier, - NamespaceStringOrUUID nss, - const BSONObj& cmdObj) - : firstSeenTimestamp(Date_t::now().toMillisSinceEpoch() / 1000, 0), - requestShapifier(std::move(requestShapifier)), - nss(nss), - oldQueryStatsKey(cmdObj.copy()) { - queryStatsStoreSizeEstimateBytesMetric.increment(sizeof(QueryStatsEntry) + sizeof(BSONObj)); - } - - ~QueryStatsEntry() { - queryStatsStoreSizeEstimateBytesMetric.decrement(sizeof(QueryStatsEntry) + sizeof(BSONObj)); - } - - BSONObj toBSON() const { - BSONObjBuilder builder{sizeof(QueryStatsEntry) + 100}; - builder.append("lastExecutionMicros", (BSONNumeric)lastExecutionMicros); - builder.append("execCount", (BSONNumeric)execCount); - queryExecMicros.appendTo(builder, "queryExecMicros"); - docsReturned.appendTo(builder, "docsReturned"); - builder.append("firstSeenTimestamp", firstSeenTimestamp); - return builder.obj(); - } - - /** - * Redact a given queryStats key and set _keySize. - */ - BSONObj computeQueryStatsKey(OperationContext* opCtx, - bool applyHmacToIdentifiers, - std::string hmacKey) const; - - /** - * Timestamp for when this query shape was added to the store. Set on construction. - */ - const Timestamp firstSeenTimestamp; - - /** - * Last execution time in microseconds. - */ - uint64_t lastExecutionMicros = 0; - - /** - * Number of query executions. - */ - uint64_t execCount = 0; - - AggregatedMetric queryExecMicros; - - AggregatedMetric docsReturned; - - std::unique_ptr requestShapifier; - - NamespaceStringOrUUID nss; - - // TODO: SERVER-73152 remove oldQueryStatsKey when RequestShapifier is used for agg. - BSONObj oldQueryStatsKey; -}; - -struct TelemetryPartitioner { - // The partitioning function for use with the 'Partitioned' utility. - std::size_t operator()(const std::size_t k, const std::size_t nPartitions) const { - return k % nPartitions; - } -}; - -struct QueryStatsStoreEntryBudgetor { - size_t operator()(const std::size_t key, const std::shared_ptr& value) { - // The buget estimator for pair in LRU cache accounts for the size of the key - // and the size of the metrics, including the bson object used for generating the telemetry - // key at read time. - - return sizeof(QueryStatsEntry) + sizeof(std::size_t) + value->oldQueryStatsKey.objsize(); - } -}; - -using QueryStatsStore = PartitionedCache, - QueryStatsStoreEntryBudgetor, - TelemetryPartitioner>; - -/** - * Acquire a reference to the global queryStats store. - */ -QueryStatsStore& getQueryStatsStore(OperationContext* opCtx); - -/** - * Register a request for queryStats collection. The queryStats machinery may decide not to - * collect anything but this should be called for all requests. The decision is made based on - * the feature flag and queryStats parameters such as rate limiting. - * - * The caller is still responsible for subsequently calling writeQueryStats() once the request is - * completed. - * - * Note that calling this affects internal state. It should be called once for each request for - * which telemetry may be collected. - * TODO SERVER-73152 remove request-specific registers, leave only registerRequest - */ -void registerAggRequest(const AggregateCommandRequest& request, OperationContext* opCtx); -void registerRequest(std::unique_ptr requestShapifier, - const NamespaceString& collection, - OperationContext* opCtx, - const boost::intrusive_ptr& expCtx); - -/** - * Writes queryStats to the queryStats store for the operation identified by `queryStatsKey`. - */ -void writeQueryStats(OperationContext* opCtx, - boost::optional queryStatsKeyHash, - boost::optional queryStatsKey, - std::unique_ptr requestShapifier, - uint64_t queryExecMicros, - uint64_t docsReturned); - -/** - * Serialize the FindCommandRequest according to the Options passed in. Returns the serialized BSON - * with hmac applied to all field names and literals. - */ -BSONObj makeQueryStatsKey(const FindCommandRequest& findCommand, - const SerializationOptions& opts, - const boost::intrusive_ptr& expCtx, - boost::optional existingMetrics = boost::none); -} // namespace query_stats -} // namespace mongo diff --git a/src/mongo/db/query/query_stats_store_test.cpp b/src/mongo/db/query/query_stats_store_test.cpp deleted file mode 100644 index e36ac7ccd98..00000000000 --- a/src/mongo/db/query/query_stats_store_test.cpp +++ /dev/null @@ -1,1164 +0,0 @@ -/** - * Copyright (C) 2022-present MongoDB, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the Server Side Public License, version 1, - * as published by MongoDB, Inc. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Server Side Public License for more details. - * - * You should have received a copy of the Server Side Public License - * along with this program. If not, see - * . - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the Server Side Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#include "mongo/bson/simple_bsonobj_comparator.h" -#include "mongo/db/catalog/rename_collection.h" -#include "mongo/db/pipeline/aggregate_request_shapifier.h" -#include "mongo/db/pipeline/expression_context_for_test.h" -#include "mongo/db/query/find_request_shapifier.h" -#include "mongo/db/query/query_feature_flags_gen.h" -#include "mongo/db/query/query_stats.h" -#include "mongo/db/service_context_test_fixture.h" -#include "mongo/idl/server_parameter_test_util.h" -#include "mongo/unittest/inline_auto_update.h" -#include "mongo/unittest/unittest.h" - -namespace mongo::query_stats { -/** - * A default hmac application strategy that generates easy to check results for testing purposes. - */ -std::string applyHmacForTest(StringData s) { - return str::stream() << "HASH<" << s << ">"; -} - -std::size_t hash(const BSONObj& obj) { - return absl::hash_internal::CityHash64(obj.objdata(), obj.objsize()); -} - -class QueryStatsStoreTest : public ServiceContextTest { -public: - BSONObj makeQueryStatsKeyFindRequest( - FindCommandRequest fcr, - const boost::intrusive_ptr& expCtx, - bool applyHmac = false, - LiteralSerializationPolicy literalPolicy = LiteralSerializationPolicy::kUnchanged) { - FindRequestShapifier findShapifier(fcr, expCtx->opCtx); - - SerializationOptions opts; - if (literalPolicy != LiteralSerializationPolicy::kUnchanged) { - // TODO SERVER-75400 Use only 'literalPolicy.' - opts.replacementForLiteralArgs = "?"; - opts.literalPolicy = literalPolicy; - } - - if (applyHmac) { - opts.applyHmacToIdentifiers = true; - opts.identifierHmacPolicy = applyHmacForTest; - } - return findShapifier.makeQueryStatsKey(opts, expCtx); - } -}; - -TEST_F(QueryStatsStoreTest, BasicUsage) { - QueryStatsStore telStore{5000000, 1000}; - - auto getMetrics = [&](const BSONObj& key) { - auto lookupResult = telStore.lookup(hash(key)); - return *lookupResult.getValue(); - }; - - auto collectMetrics = [&](BSONObj& key) { - std::shared_ptr metrics; - auto lookupResult = telStore.lookup(hash(key)); - if (!lookupResult.isOK()) { - telStore.put(hash(key), - std::make_shared(nullptr, NamespaceString{}, key)); - lookupResult = telStore.lookup(hash(key)); - } - metrics = *lookupResult.getValue(); - metrics->execCount += 1; - metrics->lastExecutionMicros += 123456; - }; - - auto query1 = BSON("query" << 1 << "xEquals" << 42); - // same value, different instance (tests hashing & equality) - auto query1x = BSON("query" << 1 << "xEquals" << 42); - auto query2 = BSON("query" << 2 << "yEquals" << 43); - - collectMetrics(query1); - collectMetrics(query1); - collectMetrics(query1x); - collectMetrics(query2); - - ASSERT_EQ(getMetrics(query1)->execCount, 3); - ASSERT_EQ(getMetrics(query1x)->execCount, 3); - ASSERT_EQ(getMetrics(query2)->execCount, 1); - - auto collectMetricsWithLock = [&](BSONObj& key) { - auto [lookupResult, lock] = telStore.getWithPartitionLock(hash(key)); - auto metrics = *lookupResult.getValue(); - metrics->execCount += 1; - metrics->lastExecutionMicros += 123456; - }; - - collectMetricsWithLock(query1x); - collectMetricsWithLock(query2); - - ASSERT_EQ(getMetrics(query1)->execCount, 4); - ASSERT_EQ(getMetrics(query1x)->execCount, 4); - ASSERT_EQ(getMetrics(query2)->execCount, 2); - - int numKeys = 0; - - telStore.forEach( - [&](std::size_t key, const std::shared_ptr& entry) { numKeys++; }); - - ASSERT_EQ(numKeys, 2); -} - - -TEST_F(QueryStatsStoreTest, EvictEntries) { - // This creates a queryStats store with 2 partitions, each with a size of 1200 bytes. - const auto cacheSize = 2400; - const auto numPartitions = 2; - QueryStatsStore telStore{cacheSize, numPartitions}; - - for (int i = 0; i < 20; i++) { - auto query = BSON("query" + std::to_string(i) << 1 << "xEquals" << 42); - telStore.put(hash(query), - std::make_shared(nullptr, NamespaceString{}, BSONObj{})); - } - int numKeys = 0; - telStore.forEach( - [&](std::size_t key, const std::shared_ptr& entry) { numKeys++; }); - - int entriesPerPartition = (cacheSize / numPartitions) / - (sizeof(std::size_t) + sizeof(QueryStatsEntry) + BSONObj().objsize()); - ASSERT_EQ(numKeys, entriesPerPartition * numPartitions); -} - -TEST_F(QueryStatsStoreTest, CorrectlyRedactsFindCommandRequestAllFields) { - auto expCtx = make_intrusive(); - FindCommandRequest fcr(NamespaceStringOrUUID(NamespaceString("testDB.testColl"))); - - fcr.setFilter(BSON("a" << 1)); - - auto key = makeQueryStatsKeyFindRequest( - fcr, expCtx, true, LiteralSerializationPolicy::kToDebugTypeString); - - ASSERT_BSONOBJ_EQ_AUTO( // NOLINT - R"({ - "queryShape": { - "cmdNs": { - "db": "HASH", - "coll": "HASH" - }, - "command": "find", - "filter": { - "HASH": { - "$eq": "?number" - } - } - } - })", - key); - - // Add sort. - fcr.setSort(BSON("sortVal" << 1 << "otherSort" << -1)); - key = makeQueryStatsKeyFindRequest( - fcr, expCtx, true, LiteralSerializationPolicy::kToDebugTypeString); - ASSERT_BSONOBJ_EQ_AUTO( // NOLINT - R"({ - "queryShape": { - "cmdNs": { - "db": "HASH", - "coll": "HASH" - }, - "command": "find", - "filter": { - "HASH": { - "$eq": "?number" - } - }, - "sort": { - "HASH": 1, - "HASH": -1 - } - } - })", - key); - - // Add inclusion projection. - fcr.setProjection(BSON("e" << true << "f" << true)); - key = makeQueryStatsKeyFindRequest( - fcr, expCtx, true, LiteralSerializationPolicy::kToDebugTypeString); - ASSERT_BSONOBJ_EQ_AUTO( // NOLINT - R"({ - "queryShape": { - "cmdNs": { - "db": "HASH", - "coll": "HASH" - }, - "command": "find", - "filter": { - "HASH": { - "$eq": "?number" - } - }, - "projection": { - "HASH": true, - "HASH": true, - "HASH<_id>": true - }, - "sort": { - "HASH": 1, - "HASH": -1 - } - } - })", - key); - - // Add let. - fcr.setLet(BSON("var1" - << "$a" - << "var2" - << "const1")); - key = makeQueryStatsKeyFindRequest( - fcr, expCtx, true, LiteralSerializationPolicy::kToDebugTypeString); - ASSERT_BSONOBJ_EQ_AUTO( // NOLINT - R"({ - "queryShape": { - "cmdNs": { - "db": "HASH", - "coll": "HASH" - }, - "command": "find", - "filter": { - "HASH": { - "$eq": "?number" - } - }, - "let": { - "HASH": "$HASH", - "HASH": "?string" - }, - "projection": { - "HASH": true, - "HASH": true, - "HASH<_id>": true - }, - "sort": { - "HASH": 1, - "HASH": -1 - } - } - })", - key); - - // Add hinting fields. - fcr.setHint(BSON("z" << 1 << "c" << 1)); - fcr.setMax(BSON("z" << 25)); - fcr.setMin(BSON("z" << 80)); - key = makeQueryStatsKeyFindRequest( - fcr, expCtx, true, LiteralSerializationPolicy::kToDebugTypeString); - ASSERT_BSONOBJ_EQ_AUTO( // NOLINT - R"({ - "queryShape": { - "cmdNs": { - "db": "HASH", - "coll": "HASH" - }, - "command": "find", - "filter": { - "HASH": { - "$eq": "?number" - } - }, - "let": { - "HASH": "$HASH", - "HASH": "?string" - }, - "projection": { - "HASH": true, - "HASH": true, - "HASH<_id>": true - }, - "hint": { - "HASH": 1, - "HASH": 1 - }, - "max": { - "HASH": "?" - }, - "min": { - "HASH": "?" - }, - "sort": { - "HASH": 1, - "HASH": -1 - } - } - })", - key); - - // Add the literal redaction fields. - fcr.setLimit(5); - fcr.setSkip(2); - fcr.setBatchSize(25); - fcr.setMaxTimeMS(1000); - fcr.setNoCursorTimeout(false); - - key = makeQueryStatsKeyFindRequest( - fcr, expCtx, true, LiteralSerializationPolicy::kToDebugTypeString); - - ASSERT_BSONOBJ_EQ_AUTO( // NOLINT - R"({ - "queryShape": { - "cmdNs": { - "db": "HASH", - "coll": "HASH" - }, - "command": "find", - "filter": { - "HASH": { - "$eq": "?number" - } - }, - "let": { - "HASH": "$HASH", - "HASH": "?string" - }, - "projection": { - "HASH": true, - "HASH": true, - "HASH<_id>": true - }, - "hint": { - "HASH": 1, - "HASH": 1 - }, - "max": { - "HASH": "?" - }, - "min": { - "HASH": "?" - }, - "sort": { - "HASH": 1, - "HASH": -1 - }, - "limit": "?number", - "skip": "?number" - }, - "maxTimeMS": "?number", - "batchSize": "?number" - } - )", - key); - - // Add the fields that shouldn't be hmacApplied. - fcr.setSingleBatch(true); - fcr.setAllowDiskUse(false); - fcr.setAllowPartialResults(true); - fcr.setAllowDiskUse(false); - fcr.setShowRecordId(true); - fcr.setAwaitData(false); - fcr.setMirrored(true); - key = makeQueryStatsKeyFindRequest( - fcr, expCtx, true, LiteralSerializationPolicy::kToDebugTypeString); - - ASSERT_BSONOBJ_EQ_AUTO( // NOLINT - R"({ - "queryShape": { - "cmdNs": { - "db": "HASH", - "coll": "HASH" - }, - "command": "find", - "filter": { - "HASH": { - "$eq": "?number" - } - }, - "let": { - "HASH": "$HASH", - "HASH": "?string" - }, - "projection": { - "HASH": true, - "HASH": true, - "HASH<_id>": true - }, - "hint": { - "HASH": 1, - "HASH": 1 - }, - "max": { - "HASH": "?" - }, - "min": { - "HASH": "?" - }, - "sort": { - "HASH": 1, - "HASH": -1 - }, - "limit": "?number", - "skip": "?number", - "singleBatch": "?bool", - "allowDiskUse": "?bool", - "showRecordId": "?bool", - "awaitData": "?bool", - "mirrored": "?bool" - }, - "allowPartialResults": true, - "maxTimeMS": "?number", - "batchSize": "?number" - })", - key); - - fcr.setAllowPartialResults(false); - key = makeQueryStatsKeyFindRequest( - fcr, expCtx, true, LiteralSerializationPolicy::kToDebugTypeString); - // Make sure that a false allowPartialResults is also accurately captured. - ASSERT_BSONOBJ_EQ_AUTO( // NOLINT - R"({ - "queryShape": { - "cmdNs": { - "db": "HASH", - "coll": "HASH" - }, - "command": "find", - "filter": { - "HASH": { - "$eq": "?number" - } - }, - "let": { - "HASH": "$HASH", - "HASH": "?string" - }, - "projection": { - "HASH": true, - "HASH": true, - "HASH<_id>": true - }, - "hint": { - "HASH": 1, - "HASH": 1 - }, - "max": { - "HASH": "?" - }, - "min": { - "HASH": "?" - }, - "sort": { - "HASH": 1, - "HASH": -1 - }, - "limit": "?number", - "skip": "?number", - "singleBatch": "?bool", - "allowDiskUse": "?bool", - "showRecordId": "?bool", - "awaitData": "?bool", - "mirrored": "?bool" - }, - "allowPartialResults": false, - "maxTimeMS": "?number", - "batchSize": "?number" - })", - key); -} - -TEST_F(QueryStatsStoreTest, CorrectlyRedactsFindCommandRequestEmptyFields) { - auto expCtx = make_intrusive(); - FindCommandRequest fcr(NamespaceStringOrUUID(NamespaceString("testDB.testColl"))); - FindRequestShapifier findShapifier(fcr, expCtx->opCtx); - fcr.setFilter(BSONObj()); - fcr.setSort(BSONObj()); - fcr.setProjection(BSONObj()); - SerializationOptions opts; - opts.literalPolicy = LiteralSerializationPolicy::kToDebugTypeString; - opts.applyHmacToIdentifiers = true; - opts.identifierHmacPolicy = applyHmacForTest; - - auto hmacApplied = findShapifier.makeQueryStatsKey(opts, expCtx); - ASSERT_BSONOBJ_EQ_AUTO( // NOLINT - R"({ - "queryShape": { - "cmdNs": { - "db": "HASH", - "coll": "HASH" - }, - "command": "find", - "filter": {} - } - })", - hmacApplied); // NOLINT (test auto-update) -} - -TEST_F(QueryStatsStoreTest, CorrectlyRedactsHintsWithOptions) { - auto expCtx = make_intrusive(); - FindCommandRequest fcr(NamespaceStringOrUUID(NamespaceString("testDB.testColl"))); - FindRequestShapifier findShapifier(fcr, expCtx->opCtx); - - fcr.setFilter(BSON("b" << 1)); - fcr.setHint(BSON("z" << 1 << "c" << 1)); - fcr.setMax(BSON("z" << 25)); - fcr.setMin(BSON("z" << 80)); - - auto key = makeQueryStatsKeyFindRequest( - fcr, expCtx, false, LiteralSerializationPolicy::kToDebugTypeString); - - ASSERT_BSONOBJ_EQ_AUTO( // NOLINT - R"({ - "queryShape": { - "cmdNs": { - "db": "testDB", - "coll": "testColl" - }, - "command": "find", - "filter": { - "b": { - "$eq": "?number" - } - }, - "hint": { - "z": 1, - "c": 1 - }, - "max": { - "z": "?" - }, - "min": { - "z": "?" - } - } - })", - key); - // Test with a string hint. Note that this is the internal representation of the string hint - // generated at parse time. - fcr.setHint(BSON("$hint" - << "z")); - - key = makeQueryStatsKeyFindRequest( - fcr, expCtx, false, LiteralSerializationPolicy::kToDebugTypeString); - ASSERT_BSONOBJ_EQ_AUTO( // NOLINT - R"({ - "queryShape": { - "cmdNs": { - "db": "testDB", - "coll": "testColl" - }, - "command": "find", - "filter": { - "b": { - "$eq": "?number" - } - }, - "hint": { - "$hint": "z" - }, - "max": { - "z": "?" - }, - "min": { - "z": "?" - } - } - })", - key); - - fcr.setHint(BSON("z" << 1 << "c" << 1)); - key = makeQueryStatsKeyFindRequest(fcr, expCtx, true, LiteralSerializationPolicy::kUnchanged); - ASSERT_BSONOBJ_EQ_AUTO( // NOLINT - R"({ - "queryShape": { - "cmdNs": { - "db": "HASH", - "coll": "HASH" - }, - "command": "find", - "filter": { - "HASH": { - "$eq": 1 - } - }, - "hint": { - "HASH": 1, - "HASH": 1 - }, - "max": { - "HASH": 25 - }, - "min": { - "HASH": 80 - } - } - })", - key); - - key = makeQueryStatsKeyFindRequest( - fcr, expCtx, true, LiteralSerializationPolicy::kToDebugTypeString); - ASSERT_BSONOBJ_EQ_AUTO( // NOLINT - R"({ - "queryShape": { - "cmdNs": { - "db": "HASH", - "coll": "HASH" - }, - "command": "find", - "filter": { - "HASH": { - "$eq": "?number" - } - }, - "hint": { - "HASH": 1, - "HASH": 1 - }, - "max": { - "HASH": "?" - }, - "min": { - "HASH": "?" - } - } - })", - key); - - // Test that $natural comes through unmodified. - fcr.setHint(BSON("$natural" << -1)); - key = makeQueryStatsKeyFindRequest( - fcr, expCtx, true, LiteralSerializationPolicy::kToDebugTypeString); - ASSERT_BSONOBJ_EQ_AUTO( // NOLINT - R"({ - "queryShape": { - "cmdNs": { - "db": "HASH", - "coll": "HASH" - }, - "command": "find", - "filter": { - "HASH": { - "$eq": "?number" - } - }, - "hint": { - "$natural": -1 - }, - "max": { - "HASH": "?" - }, - "min": { - "HASH": "?" - } - } - })", - key); -} - -TEST_F(QueryStatsStoreTest, DefinesLetVariables) { - // Test that the expression context we use to apply hmac will understand the 'let' part of the - // find command while parsing the other pieces of the command. - - // Note that this ExpressionContext will not have the let variables defined - we expect the - // 'makeQueryStatsKey' call to do that. - auto opCtx = makeOperationContext(); - FindCommandRequest fcr(NamespaceStringOrUUID(NamespaceString("testDB.testColl"))); - fcr.setLet(BSON("var" << 2)); - fcr.setFilter(fromjson("{$expr: [{$eq: ['$a', '$$var']}]}")); - fcr.setProjection(fromjson("{varIs: '$$var'}")); - - const auto cmdObj = fcr.toBSON(BSON("$db" - << "testDB")); - QueryStatsEntry testMetrics{ - std::make_unique(fcr, opCtx.get()), - fcr.getNamespaceOrUUID(), - cmdObj}; - - bool applyHmacToIdentifiers = false; - auto hmacApplied = - testMetrics.computeQueryStatsKey(opCtx.get(), applyHmacToIdentifiers, std::string{}); - ASSERT_BSONOBJ_EQ_AUTO( // NOLINT - R"({ - "queryShape": { - "cmdNs": { - "db": "testDB", - "coll": "testColl" - }, - "command": "find", - "filter": { - "$expr": [ - { - "$eq": [ - "$a", - "$$var" - ] - } - ] - }, - "let": { - "var": "?number" - }, - "projection": { - "varIs": "$$var", - "_id": true - } - } - })", - hmacApplied); - - // Now be sure hmac is applied to variable names. We don't currently expose a different way to - // do the hashing, so we'll just stick with the big long strings here for now. - applyHmacToIdentifiers = true; - hmacApplied = - testMetrics.computeQueryStatsKey(opCtx.get(), applyHmacToIdentifiers, std::string{}); - ASSERT_BSONOBJ_EQ_AUTO( // NOLINT - R"({ - "queryShape": { - "cmdNs": { - "db": "IyuPUD33jXD1td/VA/JyhbOPYY0MdGkXgdExniXmCyg=", - "coll": "QFhYnXorzWDLwH/wBgpXxp8fkfsZKo4n2cIN/O0uf/c=" - }, - "command": "find", - "filter": { - "$expr": [ - { - "$eq": [ - "$lhWpXUozYRjENbnNVMXoZEq5VrVzqikmJ0oSgLZnRxM=", - "$$adaJc6H3zDirh5/52MLv5yvnb6nXNP15Z4HzGfumvx8=" - ] - } - ] - }, - "let": { - "adaJc6H3zDirh5/52MLv5yvnb6nXNP15Z4HzGfumvx8=": "?number" - }, - "projection": { - "BL649QER7lTs0+8ozTMVNAa6JNjbhf57YT8YQ4EkT1E=": "$$adaJc6H3zDirh5/52MLv5yvnb6nXNP15Z4HzGfumvx8=", - "ljovqLSfuj6o2syO1SynOzHQK1YVij6+Wlx1fL8frUo=": true - } - } - })", - hmacApplied); -} - -TEST_F(QueryStatsStoreTest, CorrectlyRedactsAggregateCommandRequestAllFieldsSimplePipeline) { - auto expCtx = make_intrusive(); - AggregateCommandRequest acr(NamespaceString("testDB.testColl")); - auto matchStage = fromjson(R"({ - $match: { - foo: { $in: ["a", "b"] }, - bar: { $gte: { $date: "2022-01-01T00:00:00Z" } } - } - })"); - auto unwindStage = fromjson("{$unwind: '$x'}"); - auto groupStage = fromjson(R"({ - $group: { - _id: "$_id", - c: { $first: "$d.e" }, - f: { $sum: 1 } - } - })"); - auto limitStage = fromjson("{$limit: 10}"); - auto outStage = fromjson(R"({$out: 'outColl'})"); - auto rawPipeline = {matchStage, unwindStage, groupStage, limitStage, outStage}; - acr.setPipeline(rawPipeline); - auto pipeline = Pipeline::parse(rawPipeline, expCtx); - AggregateRequestShapifier aggShapifier(acr, *pipeline, expCtx->opCtx); - - SerializationOptions opts; - opts.literalPolicy = LiteralSerializationPolicy::kUnchanged; - opts.applyHmacToIdentifiers = false; - opts.identifierHmacPolicy = applyHmacForTest; - - auto shapified = aggShapifier.makeQueryStatsKey(opts, expCtx); - ASSERT_BSONOBJ_EQ_AUTO( // NOLINT - R"({ - "queryShape": { - "ns": { - "db": "testDB", - "coll": "testColl" - }, - "aggregate": "testColl", - "pipeline": [ - { - "$match": { - "foo": { - "$in": [ - "a", - "b" - ] - }, - "bar": { - "$gte": {"$date":"2022-01-01T00:00:00.000Z"} - } - } - }, - { - "$unwind": { - "path": "$x" - } - }, - { - "$group": { - "_id": "$_id", - "c": { - "$first": "$d.e" - }, - "f": { - "$sum": { - "$const": 1 - } - } - } - }, - { - "$limit": 10 - }, - { - "$out": { - "coll": "outColl", - "db": "test" - } - } - ] - } - })", - shapified); - - // TODO SERVER-75400 Use only 'literalPolicy.' - opts.replacementForLiteralArgs = "?"; - opts.literalPolicy = LiteralSerializationPolicy::kToDebugTypeString; - opts.applyHmacToIdentifiers = true; - shapified = aggShapifier.makeQueryStatsKey(opts, expCtx); - ASSERT_BSONOBJ_EQ_AUTO( // NOLINT - R"({ - "queryShape": { - "ns": { - "db": "HASH", - "coll": "HASH" - }, - "aggregate": "HASH", - "pipeline": [ - { - "$match": { - "$and": [ - { - "HASH": { - "$in": "?array" - } - }, - { - "HASH": { - "$gte": "?date" - } - } - ] - } - }, - { - "$unwind": { - "path": "$HASH" - } - }, - { - "$group": { - "_id": "$HASH<_id>", - "HASH": { - "$first": "$HASH.HASH" - }, - "HASH": { - "$sum": "?number" - } - } - }, - { - "$limit": "?" - }, - { - "$out": { - "coll": "HASH", - "db": "HASH" - } - } - ] - } - })", - shapified); - - // Add the fields that shouldn't be abstracted. - acr.setExplain(ExplainOptions::Verbosity::kExecStats); - acr.setAllowDiskUse(false); - acr.setHint(BSON("z" << 1 << "c" << 1)); - acr.setCollation(BSON("locale" - << "simple")); - shapified = aggShapifier.makeQueryStatsKey(opts, expCtx); - ASSERT_BSONOBJ_EQ_AUTO( // NOLINT - R"({ - "queryShape": { - "ns": { - "db": "HASH", - "coll": "HASH" - }, - "aggregate": "HASH", - "pipeline": [ - { - "$match": { - "$and": [ - { - "HASH": { - "$in": "?array" - } - }, - { - "HASH": { - "$gte": "?date" - } - } - ] - } - }, - { - "$unwind": { - "path": "$HASH" - } - }, - { - "$group": { - "_id": "$HASH<_id>", - "HASH": { - "$first": "$HASH.HASH" - }, - "HASH": { - "$sum": "?number" - } - } - }, - { - "$limit": "?" - }, - { - "$out": { - "coll": "HASH", - "db": "HASH" - } - } - ], - "explain": true, - "allowDiskUse": false, - "collation": { - "locale": "simple" - }, - "hint": { - "HASH": 1, - "HASH": 1 - } - } - })", - shapified); - - // Add let. - acr.setLet(BSON("var1" - << "$foo" - << "var2" - << "bar")); - shapified = aggShapifier.makeQueryStatsKey(opts, expCtx); - ASSERT_BSONOBJ_EQ_AUTO( // NOLINT - R"({ - "queryShape": { - "ns": { - "db": "HASH", - "coll": "HASH" - }, - "aggregate": "HASH", - "pipeline": [ - { - "$match": { - "$and": [ - { - "HASH": { - "$in": "?array" - } - }, - { - "HASH": { - "$gte": "?date" - } - } - ] - } - }, - { - "$unwind": { - "path": "$HASH" - } - }, - { - "$group": { - "_id": "$HASH<_id>", - "HASH": { - "$first": "$HASH.HASH" - }, - "HASH": { - "$sum": "?number" - } - } - }, - { - "$limit": "?" - }, - { - "$out": { - "coll": "HASH", - "db": "HASH" - } - } - ], - "explain": true, - "allowDiskUse": false, - "collation": { - "locale": "simple" - }, - "hint": { - "HASH": 1, - "HASH": 1 - }, - "let": { - "HASH": "$HASH", - "HASH": "?string" - } - } - })", - shapified); - - // Add the fields that should be abstracted. - auto cursorOptions = SimpleCursorOptions(); - cursorOptions.setBatchSize(10); - acr.setCursor(cursorOptions); - acr.setMaxTimeMS(500); - acr.setBypassDocumentValidation(true); - expCtx->opCtx->setComment(BSON("comment" - << "note to self")); - shapified = aggShapifier.makeQueryStatsKey(opts, expCtx); - ASSERT_BSONOBJ_EQ_AUTO( // NOLINT - R"({ - "queryShape": { - "ns": { - "db": "HASH", - "coll": "HASH" - }, - "aggregate": "HASH", - "pipeline": [ - { - "$match": { - "$and": [ - { - "HASH": { - "$in": "?array" - } - }, - { - "HASH": { - "$gte": "?date" - } - } - ] - } - }, - { - "$unwind": { - "path": "$HASH" - } - }, - { - "$group": { - "_id": "$HASH<_id>", - "HASH": { - "$first": "$HASH.HASH" - }, - "HASH": { - "$sum": "?number" - } - } - }, - { - "$limit": "?" - }, - { - "$out": { - "coll": "HASH", - "db": "HASH" - } - } - ], - "explain": true, - "allowDiskUse": false, - "collation": { - "locale": "simple" - }, - "hint": { - "HASH": 1, - "HASH": 1 - }, - "let": { - "HASH": "$HASH", - "HASH": "?string" - } - }, - "cursor": { - "batchSize": "?number" - }, - "maxTimeMS": "?number", - "bypassDocumentValidation": "?bool" - })", - shapified); -} -TEST_F(QueryStatsStoreTest, CorrectlyRedactsAggregateCommandRequestEmptyFields) { - auto expCtx = make_intrusive(); - AggregateCommandRequest acr(NamespaceString("testDB.testColl")); - acr.setPipeline({}); - auto pipeline = Pipeline::parse({}, expCtx); - AggregateRequestShapifier aggShapifier(acr, *pipeline, expCtx->opCtx); - - SerializationOptions opts; - // TODO SERVER-75400 Use only 'literalPolicy.' - opts.replacementForLiteralArgs = "?"; - opts.literalPolicy = LiteralSerializationPolicy::kToDebugTypeString; - opts.applyHmacToIdentifiers = true; - opts.identifierHmacPolicy = applyHmacForTest; - - auto shapified = aggShapifier.makeQueryStatsKey(opts, expCtx); - ASSERT_BSONOBJ_EQ_AUTO( // NOLINT - R"({ - "queryShape": { - "ns": { - "db": "HASH", - "coll": "HASH" - }, - "aggregate": "HASH", - "pipeline": [] - } - })", - shapified); // NOLINT (test auto-update) -} -} // namespace mongo::query_stats diff --git a/src/mongo/db/query/query_stats_util.cpp b/src/mongo/db/query/query_stats_util.cpp deleted file mode 100644 index 4c102d983dc..00000000000 --- a/src/mongo/db/query/query_stats_util.cpp +++ /dev/null @@ -1,97 +0,0 @@ -/** - * Copyright (C) 2022-present MongoDB, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the Server Side Public License, version 1, - * as published by MongoDB, Inc. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Server Side Public License for more details. - * - * You should have received a copy of the Server Side Public License - * along with this program. If not, see - * . - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the Server Side Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#include "mongo/db/query/query_stats_util.h" - -#include "mongo/base/status.h" -#include "mongo/db/concurrency/d_concurrency.h" -#include "mongo/db/query/partitioned_cache.h" -#include "mongo/db/query/query_knobs_gen.h" -#include "mongo/db/query/util/memory_util.h" -#include "mongo/db/service_context.h" -#include "mongo/logv2/log.h" - - -#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery - -namespace mongo::query_stats_util { - -namespace { -/** - * Given the current 'Client', returns a pointer to the 'ServiceContext' and an interface for - * updating the queryStats store. - */ -std::pair getUpdater(const Client& client) { - auto serviceCtx = client.getServiceContext(); - tassert(7106500, "ServiceContext must be non null", serviceCtx); - - auto updater = queryStatsStoreOnParamChangeUpdater(serviceCtx).get(); - tassert(7106501, "Telemetry store size updater must be non null", updater); - return {serviceCtx, updater}; -} -} // namespace - - -Status onQueryStatsStoreSizeUpdate(const std::string& str) { - auto newSize = memory_util::MemorySize::parse(str); - if (!newSize.isOK()) { - return newSize.getStatus(); - } - - // The client is nullptr if the parameter is supplied from the command line. In this case, we - // ignore the update event, the parameter will be processed when initializing the service - // context. - if (auto client = Client::getCurrent()) { - auto&& [serviceCtx, updater] = getUpdater(*client); - updater->updateCacheSize(serviceCtx, newSize.getValue()); - } - - return Status::OK(); -} - -Status validateQueryStatsStoreSize(const std::string& str, const boost::optional&) { - return memory_util::MemorySize::parse(str).getStatus(); -} - -Status onQueryStatsSamplingRateUpdate(int samplingRate) { - // The client is nullptr if the parameter is supplied from the command line. In this case, we - // ignore the update event, the parameter will be processed when initializing the service - // context. - if (auto client = Client::getCurrent()) { - auto&& [serviceCtx, updater] = getUpdater(*client); - updater->updateSamplingRate(serviceCtx, samplingRate < 0 ? INT_MAX : samplingRate); - } - - return Status::OK(); -} - -const Decorable::Decoration> - queryStatsStoreOnParamChangeUpdater = - ServiceContext::declareDecoration>(); -} // namespace mongo::query_stats_util diff --git a/src/mongo/db/query/query_stats_util.h b/src/mongo/db/query/query_stats_util.h deleted file mode 100644 index ebd8f1e2fbd..00000000000 --- a/src/mongo/db/query/query_stats_util.h +++ /dev/null @@ -1,93 +0,0 @@ -/** - * Copyright (C) 2022-present MongoDB, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the Server Side Public License, version 1, - * as published by MongoDB, Inc. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Server Side Public License for more details. - * - * You should have received a copy of the Server Side Public License - * along with this program. If not, see - * . - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the Server Side Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#pragma once - -#include "mongo/base/status.h" -#include "mongo/db/concurrency/d_concurrency.h" -#include "mongo/db/query/partitioned_cache.h" -#include "mongo/db/query/util/memory_util.h" - - -namespace mongo::query_stats_util { - -Status onQueryStatsStoreSizeUpdate(const std::string& str); - - -Status validateQueryStatsStoreSize(const std::string& str, const boost::optional&); - -Status onQueryStatsSamplingRateUpdate(int samplingRate); - -/** - * An interface used to modify the queryStats store when query setParameters are modified. This is - * done via an interface decorating the 'ServiceContext' in order to avoid a link-time dependency - * of the query knobs library on the queryStats code. - */ -class OnParamChangeUpdater { -public: - virtual ~OnParamChangeUpdater() = default; - - /** - * Resizes the queryStats store decorating 'serviceCtx' to the new size given by 'memSize'. If - * the new size is smaller than the old, cache entries are evicted in order to ensure the - * cache fits within the new size bound. - */ - virtual void updateCacheSize(ServiceContext* serviceCtx, memory_util::MemorySize memSize) = 0; - - /** - * Updates the sampling rate for the queryStats rate limiter. - */ - virtual void updateSamplingRate(ServiceContext* serviceCtx, int samplingRate) = 0; -}; - -/** - * A stub implementation that does not allow changing any parameters - to be used if the queryStats - * store is disabled and cannot be re-enabled without restarting, as with a feature flag. - */ -class NoChangesAllowedTelemetryParamUpdater : public OnParamChangeUpdater { -public: - void updateCacheSize(ServiceContext* serviceCtx, memory_util::MemorySize memSize) final { - uasserted(7373500, - "Cannot configure queryStats store - it is currently disabled and a restart is " - "required to activate."); - } - - void updateSamplingRate(ServiceContext* serviceCtx, int samplingRate) { - uasserted(7506200, - "Cannot configure queryStats store - it is currently disabled and a restart is " - "required to activate."); - } -}; - -/** - * Decorated accessor to the 'OnParamChangeUpdater' stored in 'ServiceContext'. - */ -extern const Decorable::Decoration> - queryStatsStoreOnParamChangeUpdater; -} // namespace mongo::query_stats_util diff --git a/src/mongo/db/query/request_shapifier.h b/src/mongo/db/query/request_shapifier.h index 37004197fd0..1bae8f913f9 100644 --- a/src/mongo/db/query/request_shapifier.h +++ b/src/mongo/db/query/request_shapifier.h @@ -34,27 +34,27 @@ #include "mongo/db/query/serialization_options.h" #include "mongo/rpc/metadata/client_metadata.h" -namespace mongo::query_stats { +namespace mongo::telemetry { /** - * An abstract base class to handle query shapification for queryStats. Each request type should - * define its own shapification strategy in its implementation of makeQueryStatsKey(), and then a - * request should be registered with queryStats via query_stats::registerRequest(RequestShapifier). + * An abstract base class to handle query shapification for telemetry. Each request type should + * define its own shapification strategy in its implementation of makeTelemetryKey(), and then a + * request should be registered with telemetry via telemetry::registerRequest(RequestShapifier). */ class RequestShapifier { public: virtual ~RequestShapifier() = default; /** - * makeQueryStatsKey generates the telemetry key representative of the specific request's + * makeTelemetryKey generates the telemetry key representative of the specific request's * payload. If there exists an ExpressionContext set up to parse and evaluate the request, - * makeQueryStatsKey should be called with that ExpressionContext. If not, you can call the + * makeTelemetryKey should be called with that ExpressionContext. If not, you can call the * overload that accepts the OperationContext and will construct a minimally-acceptable * ExpressionContext for the sake of generating the key. */ - virtual BSONObj makeQueryStatsKey(const SerializationOptions& opts, - OperationContext* opCtx) const = 0; - virtual BSONObj makeQueryStatsKey( + virtual BSONObj makeTelemetryKey(const SerializationOptions& opts, + OperationContext* opCtx) const = 0; + virtual BSONObj makeTelemetryKey( const SerializationOptions& opts, const boost::intrusive_ptr& expCtx) const = 0; @@ -79,4 +79,4 @@ protected: BSONObj _commentObj; boost::optional _comment = boost::none; }; -} // namespace mongo::query_stats +} // namespace mongo::telemetry diff --git a/src/mongo/db/query/telemetry.cpp b/src/mongo/db/query/telemetry.cpp new file mode 100644 index 00000000000..af17da7af02 --- /dev/null +++ b/src/mongo/db/query/telemetry.cpp @@ -0,0 +1,555 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/query/telemetry.h" + +#include "mongo/crypto/hash_block.h" +#include "mongo/crypto/sha256_block.h" +#include "mongo/db/concurrency/d_concurrency.h" +#include "mongo/db/concurrency/locker.h" +#include "mongo/db/curop.h" +#include "mongo/db/exec/projection_executor_builder.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/pipeline/aggregate_command_gen.h" +#include "mongo/db/pipeline/process_interface/stub_mongo_process_interface.h" +#include "mongo/db/query/find_command_gen.h" +#include "mongo/db/query/plan_explainer.h" +#include "mongo/db/query/projection_ast_util.h" +#include "mongo/db/query/projection_parser.h" +#include "mongo/db/query/query_feature_flags_gen.h" +#include "mongo/db/query/query_planner_params.h" +#include "mongo/db/query/query_request_helper.h" +#include "mongo/db/query/rate_limiting.h" +#include "mongo/db/query/serialization_options.h" +#include "mongo/db/query/sort_pattern.h" +#include "mongo/db/query/telemetry_util.h" +#include "mongo/logv2/log.h" +#include "mongo/rpc/metadata/client_metadata.h" +#include "mongo/util/assert_util.h" +#include "mongo/util/debug_util.h" +#include "mongo/util/processinfo.h" +#include "mongo/util/system_clock_source.h" +#include "query_shape.h" +#include + +#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery + +namespace mongo { + +namespace telemetry { + +/** + * Redacts all BSONObj field names as if they were paths, unless the field name is a special hint + * operator. + */ +namespace { + +boost::optional getApplicationName(const OperationContext* opCtx) { + if (auto metadata = ClientMetadata::get(opCtx->getClient())) { + return metadata->getApplicationName().toString(); + } + return boost::none; +} +} // namespace + +CounterMetric telemetryStoreSizeEstimateBytesMetric("telemetry.telemetryStoreSizeEstimateBytes"); + +namespace { + +CounterMetric telemetryEvictedMetric("telemetry.numEvicted"); +CounterMetric telemetryRateLimitedRequestsMetric("telemetry.numRateLimitedRequests"); +CounterMetric telemetryStoreWriteErrorsMetric("telemetry.numTelemetryStoreWriteErrors"); + +/** + * Cap the telemetry store size. + */ +size_t capTelemetryStoreSize(size_t requestedSize) { + size_t cappedStoreSize = memory_util::capMemorySize( + requestedSize /*requestedSizeBytes*/, 1 /*maximumSizeGB*/, 25 /*percentTotalSystemMemory*/); + // If capped size is less than requested size, the telemetry store has been capped at its + // upper limit. + if (cappedStoreSize < requestedSize) { + LOGV2_DEBUG(7106502, + 1, + "The telemetry store size has been capped", + "cappedSize"_attr = cappedStoreSize); + } + return cappedStoreSize; +} + +/** + * Get the telemetry store size based on the query job's value. + */ +size_t getTelemetryStoreSize() { + auto status = memory_util::MemorySize::parse(queryTelemetryStoreSize.get()); + uassertStatusOK(status); + size_t requestedSize = memory_util::convertToSizeInBytes(status.getValue()); + return capTelemetryStoreSize(requestedSize); +} + +/** + * A manager for the telemetry store allows a "pointer swap" on the telemetry store itself. The + * usage patterns are as follows: + * + * - Updating the telemetry store uses the `getTelemetryStore()` method. The telemetry store + * instance is obtained, entries are looked up and mutated, or created anew. + * - The telemetry store is "reset". This involves atomically allocating a new instance, once + * there are no more updaters (readers of the store "pointer"), and returning the existing + * instance. + */ +class TelemetryStoreManager { +public: + template + TelemetryStoreManager(size_t cacheSize, size_t numPartitions) + : _telemetryStore(std::make_unique(cacheSize, numPartitions)), + _maxSize(cacheSize) {} + + /** + * Acquire the instance of the telemetry store. + */ + TelemetryStore& getTelemetryStore() { + return *_telemetryStore; + } + + size_t getMaxSize() { + return _maxSize; + } + + /** + * Resize the telemetry store and return the number of evicted + * entries. + */ + size_t resetSize(size_t cacheSize) { + _maxSize = cacheSize; + return _telemetryStore->reset(cacheSize); + } + +private: + std::unique_ptr _telemetryStore; + + /** + * Max size of the telemetry store. Tracked here to avoid having to recompute after it's divided + * up into partitions. + */ + size_t _maxSize; +}; + +const auto telemetryStoreDecoration = + ServiceContext::declareDecoration>(); + +const auto telemetryRateLimiter = + ServiceContext::declareDecoration>(); + +class TelemetryOnParamChangeUpdaterImpl final : public telemetry_util::OnParamChangeUpdater { +public: + void updateCacheSize(ServiceContext* serviceCtx, memory_util::MemorySize memSize) final { + auto requestedSize = memory_util::convertToSizeInBytes(memSize); + auto cappedSize = capTelemetryStoreSize(requestedSize); + auto& telemetryStoreManager = telemetryStoreDecoration(serviceCtx); + size_t numEvicted = telemetryStoreManager->resetSize(cappedSize); + telemetryEvictedMetric.increment(numEvicted); + } + + void updateSamplingRate(ServiceContext* serviceCtx, int samplingRate) { + telemetryRateLimiter(serviceCtx).get()->setSamplingRate(samplingRate); + } +}; + +ServiceContext::ConstructorActionRegisterer telemetryStoreManagerRegisterer{ + "TelemetryStoreManagerRegisterer", [](ServiceContext* serviceCtx) { + // It is possible that this is called before FCV is properly set up. Setting up the store if + // the flag is enabled but FCV is incorrect is safe, and guards against the FCV being + // changed to a supported version later. + if (!feature_flags::gFeatureFlagTelemetry.isEnabledAndIgnoreFCVUnsafeAtStartup()) { + // featureFlags are not allowed to be changed at runtime. Therefore it's not an issue + // to not create a telemetry store in ConstructorActionRegisterer at start up with the + // flag off - because the flag can not be turned on at any point afterwards. + telemetry_util::telemetryStoreOnParamChangeUpdater(serviceCtx) = + std::make_unique(); + return; + } + + telemetry_util::telemetryStoreOnParamChangeUpdater(serviceCtx) = + std::make_unique(); + size_t size = getTelemetryStoreSize(); + auto&& globalTelemetryStoreManager = telemetryStoreDecoration(serviceCtx); + // The plan cache and telemetry store should use the same number of partitions. + // That is, the number of cpu cores. + size_t numPartitions = ProcessInfo::getNumCores(); + size_t partitionBytes = size / numPartitions; + size_t metricsSize = sizeof(TelemetryEntry); + if (partitionBytes < metricsSize * 10) { + numPartitions = size / metricsSize; + if (numPartitions < 1) { + numPartitions = 1; + } + } + globalTelemetryStoreManager = std::make_unique(size, numPartitions); + auto configuredSamplingRate = queryTelemetrySamplingRate.load(); + telemetryRateLimiter(serviceCtx) = std::make_unique( + configuredSamplingRate < 0 ? INT_MAX : configuredSamplingRate); + }}; + +/** + * Top-level checks for whether telemetry collection is enabled. If this returns false, we must go + * no further. + */ +bool isTelemetryEnabled(const ServiceContext* serviceCtx) { + // During initialization FCV may not yet be setup but queries could be run. We can't + // check whether telemetry should be enabled without FCV, so default to not recording + // those queries. + // TODO SERVER-75935 Remove FCV Check. + return feature_flags::gFeatureFlagTelemetry.isEnabled( + serverGlobalParams.featureCompatibility) && + telemetryStoreDecoration(serviceCtx)->getMaxSize() > 0; +} + +/** + * Internal check for whether we should collect metrics. This checks the rate limiting + * configuration for a global on/off decision and, if enabled, delegates to the rate limiter. + */ +bool shouldCollect(const ServiceContext* serviceCtx) { + // Quick escape if telemetry is turned off. + if (!isTelemetryEnabled(serviceCtx)) { + return false; + } + // Cannot collect telemetry if sampling rate is not greater than 0. Note that we do not + // increment telemetryRateLimitedRequestsMetric here since telemetry is entirely disabled. + if (telemetryRateLimiter(serviceCtx)->getSamplingRate() <= 0) { + return false; + } + // Check if rate limiting allows us to collect telemetry for this request. + if (telemetryRateLimiter(serviceCtx)->getSamplingRate() < INT_MAX && + !telemetryRateLimiter(serviceCtx)->handleRequestSlidingWindow()) { + telemetryRateLimitedRequestsMetric.increment(); + return false; + } + return true; +} + +/** + * Add a field to the find op's telemetry key. The `value` will have hmac applied. + */ +void addToFindKey(BSONObjBuilder& builder, const StringData& fieldName, const BSONObj& value) { + serializeBSONWhenNotEmpty(value.redact(false), fieldName, &builder); +} + +/** + * Recognize FLE payloads in a query and throw an exception if found. + */ +void throwIfEncounteringFLEPayload(const BSONElement& e) { + constexpr auto safeContentLabel = "__safeContent__"_sd; + constexpr auto fieldpath = "$__safeContent__"_sd; + if (e.type() == BSONType::Object) { + auto fieldname = e.fieldNameStringData(); + uassert(ErrorCodes::EncounteredFLEPayloadWhileApplyingHmac, + "Encountered __safeContent__, or an $_internalFle operator, which indicate a " + "rewritten FLE2 query.", + fieldname != safeContentLabel && !fieldname.startsWith("$_internalFle"_sd)); + } else if (e.type() == BSONType::String) { + auto val = e.valueStringData(); + uassert(ErrorCodes::EncounteredFLEPayloadWhileApplyingHmac, + "Encountered $__safeContent__ fieldpath, which indicates a rewritten FLE2 query.", + val != fieldpath); + } else if (e.type() == BSONType::BinData && e.isBinData(BinDataType::Encrypt)) { + int len; + auto data = e.binData(len); + uassert(ErrorCodes::EncounteredFLEPayloadWhileApplyingHmac, + "FLE1 Payload encountered in expression.", + len > 1 && data[1] != char(EncryptedBinDataType::kDeterministic)); + } +} + +/** + * Upon reading telemetry data, we apply hmac to some keys. This is the list. See + * TelemetryEntry::makeTelemetryKey(). + */ +const stdx::unordered_set kKeysToApplyHmac = {"pipeline", "find"}; + +std::string sha256HmacStringDataHasher(std::string key, const StringData& sd) { + auto hashed = SHA256Block::computeHmac( + (const uint8_t*)key.data(), key.size(), (const uint8_t*)sd.rawData(), sd.size()); + return hashed.toString(); +} + +std::string sha256HmacFieldNameHasher(std::string key, const BSONElement& e) { + auto&& fieldName = e.fieldNameStringData(); + return sha256HmacStringDataHasher(key, fieldName); +} + +std::string constantFieldNameHasher(const BSONElement& e) { + return {"###"}; +} + +/** + * Admittedly an abuse of the BSON redaction interface, we recognize FLE payloads here and avoid + * collecting telemetry for the query. + */ +std::string fleSafeFieldNameRedactor(const BSONElement& e) { + throwIfEncounteringFLEPayload(e); + // Ideally we would change interface to avoid copying here. + return e.fieldNameStringData().toString(); +} + +/** + * Append the element to the builder and apply hmac to any literals within the element. The element + * may be of any type. + */ +void appendWithAbstractedLiterals(BSONObjBuilder& builder, const BSONElement& el) { + if (el.type() == Object) { + builder.append(el.fieldNameStringData(), el.Obj().redact(false, fleSafeFieldNameRedactor)); + } else if (el.type() == Array) { + BSONObjBuilder arrayBuilder = builder.subarrayStart(fleSafeFieldNameRedactor(el)); + for (auto&& arrayElem : el.Obj()) { + appendWithAbstractedLiterals(arrayBuilder, arrayElem); + } + arrayBuilder.done(); + } else { + auto fieldName = fleSafeFieldNameRedactor(el); + builder.append(fieldName, "###"_sd); + } +} + +static const StringData replacementForLiteralArgs = "?"_sd; + +std::size_t hash(const BSONObj& obj) { + return absl::hash_internal::CityHash64(obj.objdata(), obj.objsize()); +} + +} // namespace + +BSONObj TelemetryEntry::computeTelemetryKey(OperationContext* opCtx, + bool applyHmacToIdentifiers, + std::string hmacKey) const { + // The telemetry key for find queries is generated by serializing all the command fields + // and applying hmac if SerializationOptions indicate to do so. The resulting key is of the + // form: + // { + // queryShape: { + // cmdNs: {db: "...", coll: "..."}, + // find: "...", + // filter: {"...": {"$eq": "?number"}}, + // }, + // applicationName: kHashedApplicationName + // } + // queryShape may include additional fields, eg hint, limit sort, etc, depending on the original + // query. + + // TODO SERVER-73152 incorporate aggregation request into same path so that nullptr check is + // unnecessary + if (requestShapifier != nullptr) { + auto serializationOpts = applyHmacToIdentifiers + ? SerializationOptions( + [&](StringData sd) { return sha256HmacStringDataHasher(hmacKey, sd); }, + LiteralSerializationPolicy::kToDebugTypeString) + : SerializationOptions(LiteralSerializationPolicy::kToDebugTypeString); + return requestShapifier->makeTelemetryKey(serializationOpts, opCtx); + } + + // TODO SERVER-73152 remove all special aggregation logic below + // The telemetry key for agg queries is of the following form: + // { "agg": {...}, "namespace": "...", "applicationName": "...", ... } + // + // The part of the key we need to apply hmac to is the object in the element. In the + // case of an aggregate() command, it will look something like: > "pipeline" : [ { "$telemetry" + // : {} }, + // { "$addFields" : { "x" : { "$someExpr" {} } } } ], + // We should preserve the top-level stage names in the pipeline but apply hmac to all field + // names of children. + + // TODO: SERVER-73152 literal and field name redaction for aggregate command. + if (!applyHmacToIdentifiers) { + return oldTelemetryKey; + } + BSONObjBuilder hmacAppliedBuilder; + for (BSONElement e : oldTelemetryKey) { + if ((e.type() == Object || e.type() == Array) && + kKeysToApplyHmac.count(e.fieldNameStringData().toString()) == 1) { + auto hmacApplicator = [&](BSONObjBuilder subObj, const BSONObj& obj) { + for (BSONElement e2 : obj) { + if (e2.type() == Object) { + subObj.append(e2.fieldNameStringData(), + e2.Obj().redact(false, [&](const BSONElement& e) { + return sha256HmacFieldNameHasher(hmacKey, e); + })); + } else { + subObj.append(e2); + } + } + subObj.done(); + }; + + // Now we're inside the :{} entry and want to preserve the top-level field + // names. If it's a [pipeline] array, we redact each element in isolation. + if (e.type() == Object) { + hmacApplicator(hmacAppliedBuilder.subobjStart(e.fieldNameStringData()), e.Obj()); + } else { + BSONObjBuilder subArr = hmacAppliedBuilder.subarrayStart(e.fieldNameStringData()); + for (BSONElement stage : e.Obj()) { + hmacApplicator(subArr.subobjStart(""), stage.Obj()); + } + } + } else { + hmacAppliedBuilder.append(e); + } + } + return hmacAppliedBuilder.obj(); +} + +// The originating command/query does not persist through the end of query execution. In order to +// pair the telemetry metrics that are collected at the end of execution with the original query, it +// is necessary to register the original query during planning and persist it after +// execution. + +// During planning, registerRequest is called to serialize the query shape and context (together, +// the telemetry context) and save it to OpDebug. Moreover, as query execution may span more than +// one request/operation and OpDebug does not persist through cursor iteration, it is necessary to +// communicate the telemetry context across operations. In this way, the telemetry context is +// registered to the cursor, so upon getMore() calls, the cursor manager passes the telemetry key +// from the pinned cursor to the new OpDebug. + +// Once query execution is complete, the telemetry context is grabbed from OpDebug, a telemetry key +// is generated from this and metrics are paired to this key in the telemetry store. +void registerAggRequest(const AggregateCommandRequest& request, OperationContext* opCtx) { + if (!isTelemetryEnabled(opCtx->getServiceContext())) { + return; + } + + // Queries against metadata collections should never appear in telemetry data. + if (request.getNamespace().isFLE2StateCollection()) { + return; + } + + if (!shouldCollect(opCtx->getServiceContext())) { + return; + } + + BSONObjBuilder telemetryKey; + BSONObjBuilder pipelineBuilder = telemetryKey.subarrayStart("pipeline"_sd); + try { + for (auto&& stage : request.getPipeline()) { + BSONObjBuilder stageBuilder = pipelineBuilder.subobjStart("stage"_sd); + appendWithAbstractedLiterals(stageBuilder, stage.firstElement()); + stageBuilder.done(); + } + pipelineBuilder.done(); + telemetryKey.append("namespace", request.getNamespace().toString()); + if (request.getReadConcern()) { + telemetryKey.append("readConcern", *request.getReadConcern()); + } + if (auto metadata = ClientMetadata::get(opCtx->getClient())) { + telemetryKey.append("applicationName", metadata->getApplicationName()); + } + } catch (ExceptionFor&) { + return; + } + + BSONObj key = telemetryKey.obj(); + CurOp::get(opCtx)->debug().telemetryStoreKeyHash = hash(key); + CurOp::get(opCtx)->debug().telemetryStoreKey = key.getOwned(); +} + +void registerRequest(std::unique_ptr requestShapifier, + const NamespaceString& collection, + OperationContext* opCtx, + const boost::intrusive_ptr& expCtx) { + if (!isTelemetryEnabled(opCtx->getServiceContext())) { + return; + } + + // Queries against metadata collections should never appear in telemetry data. + if (collection.isFLE2StateCollection()) { + return; + } + + if (!shouldCollect(opCtx->getServiceContext())) { + return; + } + SerializationOptions options; + options.literalPolicy = LiteralSerializationPolicy::kToDebugTypeString; + options.replacementForLiteralArgs = replacementForLiteralArgs; + CurOp::get(opCtx)->debug().telemetryStoreKeyHash = + hash(requestShapifier->makeTelemetryKey(options, expCtx)); + CurOp::get(opCtx)->debug().telemetryRequestShapifier = std::move(requestShapifier); +} + +TelemetryStore& getTelemetryStore(OperationContext* opCtx) { + uassert(6579000, + "Telemetry is not enabled without the feature flag on and a cache size greater than 0 " + "bytes", + isTelemetryEnabled(opCtx->getServiceContext())); + return telemetryStoreDecoration(opCtx->getServiceContext())->getTelemetryStore(); +} + +void writeTelemetry(OperationContext* opCtx, + boost::optional telemetryKeyHash, + boost::optional telemetryKey, + std::unique_ptr requestShapifier, + const uint64_t queryExecMicros, + const uint64_t docsReturned) { + if (!telemetryKeyHash) { + return; + } + auto&& telemetryStore = getTelemetryStore(opCtx); + auto&& [statusWithMetrics, partitionLock] = + telemetryStore.getWithPartitionLock(*telemetryKeyHash); + std::shared_ptr metrics; + if (statusWithMetrics.isOK()) { + metrics = *statusWithMetrics.getValue(); + } else { + BSONObj key = telemetryKey.value_or(BSONObj{}); + size_t numEvicted = + telemetryStore.put(*telemetryKeyHash, + std::make_shared( + std::move(requestShapifier), CurOp::get(opCtx)->getNSS(), key), + partitionLock); + telemetryEvictedMetric.increment(numEvicted); + auto newMetrics = partitionLock->get(*telemetryKeyHash); + if (!newMetrics.isOK()) { + // This can happen if the budget is immediately exceeded. Specifically if the there is + // not enough room for a single new entry if the number of partitions is too high + // relative to the size. + telemetryStoreWriteErrorsMetric.increment(); + LOGV2_DEBUG(7560900, + 1, + "Failed to store telemetry entry.", + "status"_attr = newMetrics.getStatus(), + "telemetryKeyHash"_attr = telemetryKeyHash); + return; + } + metrics = newMetrics.getValue()->second; + } + + metrics->lastExecutionMicros = queryExecMicros; + metrics->execCount++; + metrics->queryExecMicros.aggregate(queryExecMicros); + metrics->docsReturned.aggregate(docsReturned); +} +} // namespace telemetry +} // namespace mongo diff --git a/src/mongo/db/query/telemetry.h b/src/mongo/db/query/telemetry.h new file mode 100644 index 00000000000..e7e0f3ccfd1 --- /dev/null +++ b/src/mongo/db/query/telemetry.h @@ -0,0 +1,224 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/base/status.h" +#include "mongo/bson/bsonobj.h" +#include "mongo/db/concurrency/d_concurrency.h" +#include "mongo/db/curop.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/query/partitioned_cache.h" +#include "mongo/db/query/plan_explainer.h" +#include "mongo/db/query/request_shapifier.h" +#include "mongo/db/query/util/memory_util.h" +#include "mongo/db/service_context.h" +#include +#include + +namespace mongo { + +class OpDebug; +class AggregateCommandRequest; +class FindCommandRequest; + +namespace { +/** + * Type we use to render values to BSON. + */ +using BSONNumeric = long long; +} // namespace + +namespace telemetry { + +/** + * An aggregated metric stores a compressed view of data. It balances the loss of information + * with the reduction in required storage. + */ +struct AggregatedMetric { + + /** + * Aggregate an observed value into the metric. + */ + void aggregate(uint64_t val) { + sum += val; + max = std::max(val, max); + min = std::min(val, min); + sumOfSquares += val * val; + } + + void appendTo(BSONObjBuilder& builder, const StringData& fieldName) const { + BSONObjBuilder metricsBuilder = builder.subobjStart(fieldName); + metricsBuilder.append("sum", (BSONNumeric)sum); + metricsBuilder.append("max", (BSONNumeric)max); + metricsBuilder.append("min", (BSONNumeric)min); + metricsBuilder.append("sumOfSquares", (BSONNumeric)sumOfSquares); + metricsBuilder.done(); + } + + uint64_t sum = 0; + // Default to the _signed_ maximum (which fits in unsigned range) because we cast to + // BSONNumeric when serializing. + uint64_t min = (uint64_t)std::numeric_limits::max; + uint64_t max = 0; + + /** + * The sum of squares along with (an externally stored) count will allow us to compute the + * variance/stddev. + */ + uint64_t sumOfSquares = 0; +}; + +extern CounterMetric telemetryStoreSizeEstimateBytesMetric; +// Used to aggregate the metrics for one telemetry key over all its executions. +class TelemetryEntry { +public: + TelemetryEntry(std::unique_ptr requestShapifier, + NamespaceStringOrUUID nss, + const BSONObj& cmdObj) + : firstSeenTimestamp(Date_t::now().toMillisSinceEpoch() / 1000, 0), + requestShapifier(std::move(requestShapifier)), + nss(nss), + oldTelemetryKey(cmdObj.copy()) { + telemetryStoreSizeEstimateBytesMetric.increment(sizeof(TelemetryEntry) + sizeof(BSONObj)); + } + + ~TelemetryEntry() { + telemetryStoreSizeEstimateBytesMetric.decrement(sizeof(TelemetryEntry) + sizeof(BSONObj)); + } + + BSONObj toBSON() const { + BSONObjBuilder builder{sizeof(TelemetryEntry) + 100}; + builder.append("lastExecutionMicros", (BSONNumeric)lastExecutionMicros); + builder.append("execCount", (BSONNumeric)execCount); + queryExecMicros.appendTo(builder, "queryExecMicros"); + docsReturned.appendTo(builder, "docsReturned"); + builder.append("firstSeenTimestamp", firstSeenTimestamp); + return builder.obj(); + } + + /** + * Redact a given telemetry key and set _keySize. + */ + BSONObj computeTelemetryKey(OperationContext* opCtx, + bool applyHmacToIdentifiers, + std::string hmacKey) const; + + /** + * Timestamp for when this query shape was added to the store. Set on construction. + */ + const Timestamp firstSeenTimestamp; + + /** + * Last execution time in microseconds. + */ + uint64_t lastExecutionMicros = 0; + + /** + * Number of query executions. + */ + uint64_t execCount = 0; + + AggregatedMetric queryExecMicros; + + AggregatedMetric docsReturned; + + std::unique_ptr requestShapifier; + + NamespaceStringOrUUID nss; + + // TODO: SERVER-73152 remove oldTelemetryKey when RequestShapifier is used for agg. + BSONObj oldTelemetryKey; +}; + +struct TelemetryPartitioner { + // The partitioning function for use with the 'Partitioned' utility. + std::size_t operator()(const std::size_t k, const std::size_t nPartitions) const { + return k % nPartitions; + } +}; + +struct TelemetryStoreEntryBudgetor { + size_t operator()(const std::size_t key, const std::shared_ptr& value) { + // The buget estimator for pair in LRU cache accounts for the size of the key + // and the size of the metrics, including the bson object used for generating the telemetry + // key at read time. + + return sizeof(TelemetryEntry) + sizeof(std::size_t) + value->oldTelemetryKey.objsize(); + } +}; + +using TelemetryStore = PartitionedCache, + TelemetryStoreEntryBudgetor, + TelemetryPartitioner>; + +/** + * Acquire a reference to the global telemetry store. + */ +TelemetryStore& getTelemetryStore(OperationContext* opCtx); + +/** + * Register a request for telemetry collection. The telemetry machinery may decide not to + * collect anything but this should be called for all requests. The decision is made based on + * the feature flag and telemetry parameters such as rate limiting. + * + * The caller is still responsible for subsequently calling writeTelemetry() once the request is + * completed. + * + * Note that calling this affects internal state. It should be called once for each request for + * which telemetry may be collected. + * TODO SERVER-73152 remove request-specific registers, leave only registerRequest + */ +void registerAggRequest(const AggregateCommandRequest& request, OperationContext* opCtx); +void registerRequest(std::unique_ptr requestShapifier, + const NamespaceString& collection, + OperationContext* opCtx, + const boost::intrusive_ptr& expCtx); + +/** + * Writes telemetry to the telemetry store for the operation identified by `telemetryKey`. + */ +void writeTelemetry(OperationContext* opCtx, + boost::optional telemetryKeyHash, + boost::optional telemetryKey, + std::unique_ptr requestShapifier, + uint64_t queryExecMicros, + uint64_t docsReturned); + +/** + * Serialize the FindCommandRequest according to the Options passed in. Returns the serialized BSON + * with hmac applied to all field names and literals. + */ +BSONObj makeTelemetryKey(const FindCommandRequest& findCommand, + const SerializationOptions& opts, + const boost::intrusive_ptr& expCtx, + boost::optional existingMetrics = boost::none); +} // namespace telemetry +} // namespace mongo diff --git a/src/mongo/db/query/telemetry_store_test.cpp b/src/mongo/db/query/telemetry_store_test.cpp new file mode 100644 index 00000000000..8d68ee566c6 --- /dev/null +++ b/src/mongo/db/query/telemetry_store_test.cpp @@ -0,0 +1,1163 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/bson/simple_bsonobj_comparator.h" +#include "mongo/db/catalog/rename_collection.h" +#include "mongo/db/pipeline/aggregate_request_shapifier.h" +#include "mongo/db/pipeline/expression_context_for_test.h" +#include "mongo/db/query/find_request_shapifier.h" +#include "mongo/db/query/query_feature_flags_gen.h" +#include "mongo/db/query/telemetry.h" +#include "mongo/db/service_context_test_fixture.h" +#include "mongo/idl/server_parameter_test_util.h" +#include "mongo/unittest/inline_auto_update.h" +#include "mongo/unittest/unittest.h" + +namespace mongo::telemetry { +/** + * A default hmac application strategy that generates easy to check results for testing purposes. + */ +std::string applyHmacForTest(StringData s) { + return str::stream() << "HASH<" << s << ">"; +} + +std::size_t hash(const BSONObj& obj) { + return absl::hash_internal::CityHash64(obj.objdata(), obj.objsize()); +} + +class TelemetryStoreTest : public ServiceContextTest { +public: + BSONObj makeTelemetryKeyFindRequest( + FindCommandRequest fcr, + const boost::intrusive_ptr& expCtx, + bool applyHmac = false, + LiteralSerializationPolicy literalPolicy = LiteralSerializationPolicy::kUnchanged) { + FindRequestShapifier findShapifier(fcr, expCtx->opCtx); + + SerializationOptions opts; + if (literalPolicy != LiteralSerializationPolicy::kUnchanged) { + // TODO SERVER-75400 Use only 'literalPolicy.' + opts.replacementForLiteralArgs = "?"; + opts.literalPolicy = literalPolicy; + } + + if (applyHmac) { + opts.applyHmacToIdentifiers = true; + opts.identifierHmacPolicy = applyHmacForTest; + } + return findShapifier.makeTelemetryKey(opts, expCtx); + } +}; + +TEST_F(TelemetryStoreTest, BasicUsage) { + TelemetryStore telStore{5000000, 1000}; + + auto getMetrics = [&](const BSONObj& key) { + auto lookupResult = telStore.lookup(hash(key)); + return *lookupResult.getValue(); + }; + + auto collectMetrics = [&](BSONObj& key) { + std::shared_ptr metrics; + auto lookupResult = telStore.lookup(hash(key)); + if (!lookupResult.isOK()) { + telStore.put(hash(key), + std::make_shared(nullptr, NamespaceString{}, key)); + lookupResult = telStore.lookup(hash(key)); + } + metrics = *lookupResult.getValue(); + metrics->execCount += 1; + metrics->lastExecutionMicros += 123456; + }; + + auto query1 = BSON("query" << 1 << "xEquals" << 42); + // same value, different instance (tests hashing & equality) + auto query1x = BSON("query" << 1 << "xEquals" << 42); + auto query2 = BSON("query" << 2 << "yEquals" << 43); + + collectMetrics(query1); + collectMetrics(query1); + collectMetrics(query1x); + collectMetrics(query2); + + ASSERT_EQ(getMetrics(query1)->execCount, 3); + ASSERT_EQ(getMetrics(query1x)->execCount, 3); + ASSERT_EQ(getMetrics(query2)->execCount, 1); + + auto collectMetricsWithLock = [&](BSONObj& key) { + auto [lookupResult, lock] = telStore.getWithPartitionLock(hash(key)); + auto metrics = *lookupResult.getValue(); + metrics->execCount += 1; + metrics->lastExecutionMicros += 123456; + }; + + collectMetricsWithLock(query1x); + collectMetricsWithLock(query2); + + ASSERT_EQ(getMetrics(query1)->execCount, 4); + ASSERT_EQ(getMetrics(query1x)->execCount, 4); + ASSERT_EQ(getMetrics(query2)->execCount, 2); + + int numKeys = 0; + + telStore.forEach( + [&](std::size_t key, const std::shared_ptr& entry) { numKeys++; }); + + ASSERT_EQ(numKeys, 2); +} + + +TEST_F(TelemetryStoreTest, EvictEntries) { + // This creates a telemetry store with 2 partitions, each with a size of 1200 bytes. + const auto cacheSize = 2400; + const auto numPartitions = 2; + TelemetryStore telStore{cacheSize, numPartitions}; + + for (int i = 0; i < 20; i++) { + auto query = BSON("query" + std::to_string(i) << 1 << "xEquals" << 42); + telStore.put(hash(query), + std::make_shared(nullptr, NamespaceString{}, BSONObj{})); + } + int numKeys = 0; + telStore.forEach( + [&](std::size_t key, const std::shared_ptr& entry) { numKeys++; }); + + int entriesPerPartition = (cacheSize / numPartitions) / + (sizeof(std::size_t) + sizeof(TelemetryEntry) + BSONObj().objsize()); + ASSERT_EQ(numKeys, entriesPerPartition * numPartitions); +} + +TEST_F(TelemetryStoreTest, CorrectlyRedactsFindCommandRequestAllFields) { + auto expCtx = make_intrusive(); + FindCommandRequest fcr(NamespaceStringOrUUID(NamespaceString("testDB.testColl"))); + + fcr.setFilter(BSON("a" << 1)); + + auto key = makeTelemetryKeyFindRequest( + fcr, expCtx, true, LiteralSerializationPolicy::kToDebugTypeString); + + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "cmdNs": { + "db": "HASH", + "coll": "HASH" + }, + "command": "find", + "filter": { + "HASH": { + "$eq": "?number" + } + } + } + })", + key); + + // Add sort. + fcr.setSort(BSON("sortVal" << 1 << "otherSort" << -1)); + key = makeTelemetryKeyFindRequest( + fcr, expCtx, true, LiteralSerializationPolicy::kToDebugTypeString); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "cmdNs": { + "db": "HASH", + "coll": "HASH" + }, + "command": "find", + "filter": { + "HASH": { + "$eq": "?number" + } + }, + "sort": { + "HASH": 1, + "HASH": -1 + } + } + })", + key); + + // Add inclusion projection. + fcr.setProjection(BSON("e" << true << "f" << true)); + key = makeTelemetryKeyFindRequest( + fcr, expCtx, true, LiteralSerializationPolicy::kToDebugTypeString); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "cmdNs": { + "db": "HASH", + "coll": "HASH" + }, + "command": "find", + "filter": { + "HASH": { + "$eq": "?number" + } + }, + "projection": { + "HASH": true, + "HASH": true, + "HASH<_id>": true + }, + "sort": { + "HASH": 1, + "HASH": -1 + } + } + })", + key); + + // Add let. + fcr.setLet(BSON("var1" + << "$a" + << "var2" + << "const1")); + key = makeTelemetryKeyFindRequest( + fcr, expCtx, true, LiteralSerializationPolicy::kToDebugTypeString); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "cmdNs": { + "db": "HASH", + "coll": "HASH" + }, + "command": "find", + "filter": { + "HASH": { + "$eq": "?number" + } + }, + "let": { + "HASH": "$HASH", + "HASH": "?string" + }, + "projection": { + "HASH": true, + "HASH": true, + "HASH<_id>": true + }, + "sort": { + "HASH": 1, + "HASH": -1 + } + } + })", + key); + + // Add hinting fields. + fcr.setHint(BSON("z" << 1 << "c" << 1)); + fcr.setMax(BSON("z" << 25)); + fcr.setMin(BSON("z" << 80)); + key = makeTelemetryKeyFindRequest( + fcr, expCtx, true, LiteralSerializationPolicy::kToDebugTypeString); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "cmdNs": { + "db": "HASH", + "coll": "HASH" + }, + "command": "find", + "filter": { + "HASH": { + "$eq": "?number" + } + }, + "let": { + "HASH": "$HASH", + "HASH": "?string" + }, + "projection": { + "HASH": true, + "HASH": true, + "HASH<_id>": true + }, + "hint": { + "HASH": 1, + "HASH": 1 + }, + "max": { + "HASH": "?" + }, + "min": { + "HASH": "?" + }, + "sort": { + "HASH": 1, + "HASH": -1 + } + } + })", + key); + + // Add the literal redaction fields. + fcr.setLimit(5); + fcr.setSkip(2); + fcr.setBatchSize(25); + fcr.setMaxTimeMS(1000); + fcr.setNoCursorTimeout(false); + + key = makeTelemetryKeyFindRequest( + fcr, expCtx, true, LiteralSerializationPolicy::kToDebugTypeString); + + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "cmdNs": { + "db": "HASH", + "coll": "HASH" + }, + "command": "find", + "filter": { + "HASH": { + "$eq": "?number" + } + }, + "let": { + "HASH": "$HASH", + "HASH": "?string" + }, + "projection": { + "HASH": true, + "HASH": true, + "HASH<_id>": true + }, + "hint": { + "HASH": 1, + "HASH": 1 + }, + "max": { + "HASH": "?" + }, + "min": { + "HASH": "?" + }, + "sort": { + "HASH": 1, + "HASH": -1 + }, + "limit": "?number", + "skip": "?number" + }, + "maxTimeMS": "?number", + "batchSize": "?number" + } + )", + key); + + // Add the fields that shouldn't be hmacApplied. + fcr.setSingleBatch(true); + fcr.setAllowDiskUse(false); + fcr.setAllowPartialResults(true); + fcr.setAllowDiskUse(false); + fcr.setShowRecordId(true); + fcr.setAwaitData(false); + fcr.setMirrored(true); + key = makeTelemetryKeyFindRequest( + fcr, expCtx, true, LiteralSerializationPolicy::kToDebugTypeString); + + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "cmdNs": { + "db": "HASH", + "coll": "HASH" + }, + "command": "find", + "filter": { + "HASH": { + "$eq": "?number" + } + }, + "let": { + "HASH": "$HASH", + "HASH": "?string" + }, + "projection": { + "HASH": true, + "HASH": true, + "HASH<_id>": true + }, + "hint": { + "HASH": 1, + "HASH": 1 + }, + "max": { + "HASH": "?" + }, + "min": { + "HASH": "?" + }, + "sort": { + "HASH": 1, + "HASH": -1 + }, + "limit": "?number", + "skip": "?number", + "singleBatch": "?bool", + "allowDiskUse": "?bool", + "showRecordId": "?bool", + "awaitData": "?bool", + "mirrored": "?bool" + }, + "allowPartialResults": true, + "maxTimeMS": "?number", + "batchSize": "?number" + })", + key); + + fcr.setAllowPartialResults(false); + key = makeTelemetryKeyFindRequest( + fcr, expCtx, true, LiteralSerializationPolicy::kToDebugTypeString); + // Make sure that a false allowPartialResults is also accurately captured. + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "cmdNs": { + "db": "HASH", + "coll": "HASH" + }, + "command": "find", + "filter": { + "HASH": { + "$eq": "?number" + } + }, + "let": { + "HASH": "$HASH", + "HASH": "?string" + }, + "projection": { + "HASH": true, + "HASH": true, + "HASH<_id>": true + }, + "hint": { + "HASH": 1, + "HASH": 1 + }, + "max": { + "HASH": "?" + }, + "min": { + "HASH": "?" + }, + "sort": { + "HASH": 1, + "HASH": -1 + }, + "limit": "?number", + "skip": "?number", + "singleBatch": "?bool", + "allowDiskUse": "?bool", + "showRecordId": "?bool", + "awaitData": "?bool", + "mirrored": "?bool" + }, + "allowPartialResults": false, + "maxTimeMS": "?number", + "batchSize": "?number" + })", + key); +} + +TEST_F(TelemetryStoreTest, CorrectlyRedactsFindCommandRequestEmptyFields) { + auto expCtx = make_intrusive(); + FindCommandRequest fcr(NamespaceStringOrUUID(NamespaceString("testDB.testColl"))); + FindRequestShapifier findShapifier(fcr, expCtx->opCtx); + fcr.setFilter(BSONObj()); + fcr.setSort(BSONObj()); + fcr.setProjection(BSONObj()); + SerializationOptions opts; + opts.literalPolicy = LiteralSerializationPolicy::kToDebugTypeString; + opts.applyHmacToIdentifiers = true; + opts.identifierHmacPolicy = applyHmacForTest; + + auto hmacApplied = findShapifier.makeTelemetryKey(opts, expCtx); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "cmdNs": { + "db": "HASH", + "coll": "HASH" + }, + "command": "find", + "filter": {} + } + })", + hmacApplied); // NOLINT (test auto-update) +} + +TEST_F(TelemetryStoreTest, CorrectlyRedactsHintsWithOptions) { + auto expCtx = make_intrusive(); + FindCommandRequest fcr(NamespaceStringOrUUID(NamespaceString("testDB.testColl"))); + FindRequestShapifier findShapifier(fcr, expCtx->opCtx); + + fcr.setFilter(BSON("b" << 1)); + fcr.setHint(BSON("z" << 1 << "c" << 1)); + fcr.setMax(BSON("z" << 25)); + fcr.setMin(BSON("z" << 80)); + + auto key = makeTelemetryKeyFindRequest( + fcr, expCtx, false, LiteralSerializationPolicy::kToDebugTypeString); + + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "cmdNs": { + "db": "testDB", + "coll": "testColl" + }, + "command": "find", + "filter": { + "b": { + "$eq": "?number" + } + }, + "hint": { + "z": 1, + "c": 1 + }, + "max": { + "z": "?" + }, + "min": { + "z": "?" + } + } + })", + key); + // Test with a string hint. Note that this is the internal representation of the string hint + // generated at parse time. + fcr.setHint(BSON("$hint" + << "z")); + + key = makeTelemetryKeyFindRequest( + fcr, expCtx, false, LiteralSerializationPolicy::kToDebugTypeString); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "cmdNs": { + "db": "testDB", + "coll": "testColl" + }, + "command": "find", + "filter": { + "b": { + "$eq": "?number" + } + }, + "hint": { + "$hint": "z" + }, + "max": { + "z": "?" + }, + "min": { + "z": "?" + } + } + })", + key); + + fcr.setHint(BSON("z" << 1 << "c" << 1)); + key = makeTelemetryKeyFindRequest(fcr, expCtx, true, LiteralSerializationPolicy::kUnchanged); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "cmdNs": { + "db": "HASH", + "coll": "HASH" + }, + "command": "find", + "filter": { + "HASH": { + "$eq": 1 + } + }, + "hint": { + "HASH": 1, + "HASH": 1 + }, + "max": { + "HASH": 25 + }, + "min": { + "HASH": 80 + } + } + })", + key); + + key = makeTelemetryKeyFindRequest( + fcr, expCtx, true, LiteralSerializationPolicy::kToDebugTypeString); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "cmdNs": { + "db": "HASH", + "coll": "HASH" + }, + "command": "find", + "filter": { + "HASH": { + "$eq": "?number" + } + }, + "hint": { + "HASH": 1, + "HASH": 1 + }, + "max": { + "HASH": "?" + }, + "min": { + "HASH": "?" + } + } + })", + key); + + // Test that $natural comes through unmodified. + fcr.setHint(BSON("$natural" << -1)); + key = makeTelemetryKeyFindRequest( + fcr, expCtx, true, LiteralSerializationPolicy::kToDebugTypeString); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "cmdNs": { + "db": "HASH", + "coll": "HASH" + }, + "command": "find", + "filter": { + "HASH": { + "$eq": "?number" + } + }, + "hint": { + "$natural": -1 + }, + "max": { + "HASH": "?" + }, + "min": { + "HASH": "?" + } + } + })", + key); +} + +TEST_F(TelemetryStoreTest, DefinesLetVariables) { + // Test that the expression context we use to apply hmac will understand the 'let' part of the + // find command while parsing the other pieces of the command. + + // Note that this ExpressionContext will not have the let variables defined - we expect the + // 'makeTelemetryKey' call to do that. + auto opCtx = makeOperationContext(); + FindCommandRequest fcr(NamespaceStringOrUUID(NamespaceString("testDB.testColl"))); + fcr.setLet(BSON("var" << 2)); + fcr.setFilter(fromjson("{$expr: [{$eq: ['$a', '$$var']}]}")); + fcr.setProjection(fromjson("{varIs: '$$var'}")); + + const auto cmdObj = fcr.toBSON(BSON("$db" + << "testDB")); + TelemetryEntry testMetrics{std::make_unique(fcr, opCtx.get()), + fcr.getNamespaceOrUUID(), + cmdObj}; + + bool applyHmacToIdentifiers = false; + auto hmacApplied = + testMetrics.computeTelemetryKey(opCtx.get(), applyHmacToIdentifiers, std::string{}); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "cmdNs": { + "db": "testDB", + "coll": "testColl" + }, + "command": "find", + "filter": { + "$expr": [ + { + "$eq": [ + "$a", + "$$var" + ] + } + ] + }, + "let": { + "var": "?number" + }, + "projection": { + "varIs": "$$var", + "_id": true + } + } + })", + hmacApplied); + + // Now be sure hmac is applied to variable names. We don't currently expose a different way to + // do the hashing, so we'll just stick with the big long strings here for now. + applyHmacToIdentifiers = true; + hmacApplied = + testMetrics.computeTelemetryKey(opCtx.get(), applyHmacToIdentifiers, std::string{}); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "cmdNs": { + "db": "IyuPUD33jXD1td/VA/JyhbOPYY0MdGkXgdExniXmCyg=", + "coll": "QFhYnXorzWDLwH/wBgpXxp8fkfsZKo4n2cIN/O0uf/c=" + }, + "command": "find", + "filter": { + "$expr": [ + { + "$eq": [ + "$lhWpXUozYRjENbnNVMXoZEq5VrVzqikmJ0oSgLZnRxM=", + "$$adaJc6H3zDirh5/52MLv5yvnb6nXNP15Z4HzGfumvx8=" + ] + } + ] + }, + "let": { + "adaJc6H3zDirh5/52MLv5yvnb6nXNP15Z4HzGfumvx8=": "?number" + }, + "projection": { + "BL649QER7lTs0+8ozTMVNAa6JNjbhf57YT8YQ4EkT1E=": "$$adaJc6H3zDirh5/52MLv5yvnb6nXNP15Z4HzGfumvx8=", + "ljovqLSfuj6o2syO1SynOzHQK1YVij6+Wlx1fL8frUo=": true + } + } + })", + hmacApplied); +} + +TEST_F(TelemetryStoreTest, CorrectlyRedactsAggregateCommandRequestAllFieldsSimplePipeline) { + auto expCtx = make_intrusive(); + AggregateCommandRequest acr(NamespaceString("testDB.testColl")); + auto matchStage = fromjson(R"({ + $match: { + foo: { $in: ["a", "b"] }, + bar: { $gte: { $date: "2022-01-01T00:00:00Z" } } + } + })"); + auto unwindStage = fromjson("{$unwind: '$x'}"); + auto groupStage = fromjson(R"({ + $group: { + _id: "$_id", + c: { $first: "$d.e" }, + f: { $sum: 1 } + } + })"); + auto limitStage = fromjson("{$limit: 10}"); + auto outStage = fromjson(R"({$out: 'outColl'})"); + auto rawPipeline = {matchStage, unwindStage, groupStage, limitStage, outStage}; + acr.setPipeline(rawPipeline); + auto pipeline = Pipeline::parse(rawPipeline, expCtx); + AggregateRequestShapifier aggShapifier(acr, *pipeline, expCtx->opCtx); + + SerializationOptions opts; + opts.literalPolicy = LiteralSerializationPolicy::kUnchanged; + opts.applyHmacToIdentifiers = false; + opts.identifierHmacPolicy = applyHmacForTest; + + auto shapified = aggShapifier.makeTelemetryKey(opts, expCtx); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "ns": { + "db": "testDB", + "coll": "testColl" + }, + "aggregate": "testColl", + "pipeline": [ + { + "$match": { + "foo": { + "$in": [ + "a", + "b" + ] + }, + "bar": { + "$gte": {"$date":"2022-01-01T00:00:00.000Z"} + } + } + }, + { + "$unwind": { + "path": "$x" + } + }, + { + "$group": { + "_id": "$_id", + "c": { + "$first": "$d.e" + }, + "f": { + "$sum": { + "$const": 1 + } + } + } + }, + { + "$limit": 10 + }, + { + "$out": { + "coll": "outColl", + "db": "test" + } + } + ] + } + })", + shapified); + + // TODO SERVER-75400 Use only 'literalPolicy.' + opts.replacementForLiteralArgs = "?"; + opts.literalPolicy = LiteralSerializationPolicy::kToDebugTypeString; + opts.applyHmacToIdentifiers = true; + shapified = aggShapifier.makeTelemetryKey(opts, expCtx); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "ns": { + "db": "HASH", + "coll": "HASH" + }, + "aggregate": "HASH", + "pipeline": [ + { + "$match": { + "$and": [ + { + "HASH": { + "$in": "?array" + } + }, + { + "HASH": { + "$gte": "?date" + } + } + ] + } + }, + { + "$unwind": { + "path": "$HASH" + } + }, + { + "$group": { + "_id": "$HASH<_id>", + "HASH": { + "$first": "$HASH.HASH" + }, + "HASH": { + "$sum": "?number" + } + } + }, + { + "$limit": "?" + }, + { + "$out": { + "coll": "HASH", + "db": "HASH" + } + } + ] + } + })", + shapified); + + // Add the fields that shouldn't be abstracted. + acr.setExplain(ExplainOptions::Verbosity::kExecStats); + acr.setAllowDiskUse(false); + acr.setHint(BSON("z" << 1 << "c" << 1)); + acr.setCollation(BSON("locale" + << "simple")); + shapified = aggShapifier.makeTelemetryKey(opts, expCtx); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "ns": { + "db": "HASH", + "coll": "HASH" + }, + "aggregate": "HASH", + "pipeline": [ + { + "$match": { + "$and": [ + { + "HASH": { + "$in": "?array" + } + }, + { + "HASH": { + "$gte": "?date" + } + } + ] + } + }, + { + "$unwind": { + "path": "$HASH" + } + }, + { + "$group": { + "_id": "$HASH<_id>", + "HASH": { + "$first": "$HASH.HASH" + }, + "HASH": { + "$sum": "?number" + } + } + }, + { + "$limit": "?" + }, + { + "$out": { + "coll": "HASH", + "db": "HASH" + } + } + ], + "explain": true, + "allowDiskUse": false, + "collation": { + "locale": "simple" + }, + "hint": { + "HASH": 1, + "HASH": 1 + } + } + })", + shapified); + + // Add let. + acr.setLet(BSON("var1" + << "$foo" + << "var2" + << "bar")); + shapified = aggShapifier.makeTelemetryKey(opts, expCtx); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "ns": { + "db": "HASH", + "coll": "HASH" + }, + "aggregate": "HASH", + "pipeline": [ + { + "$match": { + "$and": [ + { + "HASH": { + "$in": "?array" + } + }, + { + "HASH": { + "$gte": "?date" + } + } + ] + } + }, + { + "$unwind": { + "path": "$HASH" + } + }, + { + "$group": { + "_id": "$HASH<_id>", + "HASH": { + "$first": "$HASH.HASH" + }, + "HASH": { + "$sum": "?number" + } + } + }, + { + "$limit": "?" + }, + { + "$out": { + "coll": "HASH", + "db": "HASH" + } + } + ], + "explain": true, + "allowDiskUse": false, + "collation": { + "locale": "simple" + }, + "hint": { + "HASH": 1, + "HASH": 1 + }, + "let": { + "HASH": "$HASH", + "HASH": "?string" + } + } + })", + shapified); + + // Add the fields that should be abstracted. + auto cursorOptions = SimpleCursorOptions(); + cursorOptions.setBatchSize(10); + acr.setCursor(cursorOptions); + acr.setMaxTimeMS(500); + acr.setBypassDocumentValidation(true); + expCtx->opCtx->setComment(BSON("comment" + << "note to self")); + shapified = aggShapifier.makeTelemetryKey(opts, expCtx); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "ns": { + "db": "HASH", + "coll": "HASH" + }, + "aggregate": "HASH", + "pipeline": [ + { + "$match": { + "$and": [ + { + "HASH": { + "$in": "?array" + } + }, + { + "HASH": { + "$gte": "?date" + } + } + ] + } + }, + { + "$unwind": { + "path": "$HASH" + } + }, + { + "$group": { + "_id": "$HASH<_id>", + "HASH": { + "$first": "$HASH.HASH" + }, + "HASH": { + "$sum": "?number" + } + } + }, + { + "$limit": "?" + }, + { + "$out": { + "coll": "HASH", + "db": "HASH" + } + } + ], + "explain": true, + "allowDiskUse": false, + "collation": { + "locale": "simple" + }, + "hint": { + "HASH": 1, + "HASH": 1 + }, + "let": { + "HASH": "$HASH", + "HASH": "?string" + } + }, + "cursor": { + "batchSize": "?number" + }, + "maxTimeMS": "?number", + "bypassDocumentValidation": "?bool" + })", + shapified); +} +TEST_F(TelemetryStoreTest, CorrectlyRedactsAggregateCommandRequestEmptyFields) { + auto expCtx = make_intrusive(); + AggregateCommandRequest acr(NamespaceString("testDB.testColl")); + acr.setPipeline({}); + auto pipeline = Pipeline::parse({}, expCtx); + AggregateRequestShapifier aggShapifier(acr, *pipeline, expCtx->opCtx); + + SerializationOptions opts; + // TODO SERVER-75400 Use only 'literalPolicy.' + opts.replacementForLiteralArgs = "?"; + opts.literalPolicy = LiteralSerializationPolicy::kToDebugTypeString; + opts.applyHmacToIdentifiers = true; + opts.identifierHmacPolicy = applyHmacForTest; + + auto shapified = aggShapifier.makeTelemetryKey(opts, expCtx); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "queryShape": { + "ns": { + "db": "HASH", + "coll": "HASH" + }, + "aggregate": "HASH", + "pipeline": [] + } + })", + shapified); // NOLINT (test auto-update) +} +} // namespace mongo::telemetry diff --git a/src/mongo/db/query/telemetry_util.cpp b/src/mongo/db/query/telemetry_util.cpp new file mode 100644 index 00000000000..eeaf7da71e6 --- /dev/null +++ b/src/mongo/db/query/telemetry_util.cpp @@ -0,0 +1,97 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/query/telemetry_util.h" + +#include "mongo/base/status.h" +#include "mongo/db/concurrency/d_concurrency.h" +#include "mongo/db/query/partitioned_cache.h" +#include "mongo/db/query/query_knobs_gen.h" +#include "mongo/db/query/util/memory_util.h" +#include "mongo/db/service_context.h" +#include "mongo/logv2/log.h" + + +#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery + +namespace mongo::telemetry_util { + +namespace { +/** + * Given the current 'Client', returns a pointer to the 'ServiceContext' and an interface for + * updating the telemetry store. + */ +std::pair getUpdater(const Client& client) { + auto serviceCtx = client.getServiceContext(); + tassert(7106500, "ServiceContext must be non null", serviceCtx); + + auto updater = telemetryStoreOnParamChangeUpdater(serviceCtx).get(); + tassert(7106501, "Telemetry store size updater must be non null", updater); + return {serviceCtx, updater}; +} +} // namespace + + +Status onTelemetryStoreSizeUpdate(const std::string& str) { + auto newSize = memory_util::MemorySize::parse(str); + if (!newSize.isOK()) { + return newSize.getStatus(); + } + + // The client is nullptr if the parameter is supplied from the command line. In this case, we + // ignore the update event, the parameter will be processed when initializing the service + // context. + if (auto client = Client::getCurrent()) { + auto&& [serviceCtx, updater] = getUpdater(*client); + updater->updateCacheSize(serviceCtx, newSize.getValue()); + } + + return Status::OK(); +} + +Status validateTelemetryStoreSize(const std::string& str, const boost::optional&) { + return memory_util::MemorySize::parse(str).getStatus(); +} + +Status onTelemetrySamplingRateUpdate(int samplingRate) { + // The client is nullptr if the parameter is supplied from the command line. In this case, we + // ignore the update event, the parameter will be processed when initializing the service + // context. + if (auto client = Client::getCurrent()) { + auto&& [serviceCtx, updater] = getUpdater(*client); + updater->updateSamplingRate(serviceCtx, samplingRate < 0 ? INT_MAX : samplingRate); + } + + return Status::OK(); +} + +const Decorable::Decoration> + telemetryStoreOnParamChangeUpdater = + ServiceContext::declareDecoration>(); +} // namespace mongo::telemetry_util diff --git a/src/mongo/db/query/telemetry_util.h b/src/mongo/db/query/telemetry_util.h new file mode 100644 index 00000000000..c8fc37dc5c4 --- /dev/null +++ b/src/mongo/db/query/telemetry_util.h @@ -0,0 +1,93 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/base/status.h" +#include "mongo/db/concurrency/d_concurrency.h" +#include "mongo/db/query/partitioned_cache.h" +#include "mongo/db/query/util/memory_util.h" + + +namespace mongo::telemetry_util { + +Status onTelemetryStoreSizeUpdate(const std::string& str); + + +Status validateTelemetryStoreSize(const std::string& str, const boost::optional&); + +Status onTelemetrySamplingRateUpdate(int samplingRate); + +/** + * An interface used to modify the telemetry store when query setParameters are modified. This is + * done via an interface decorating the 'ServiceContext' in order to avoid a link-time dependency + * of the query knobs library on the telemetry code. + */ +class OnParamChangeUpdater { +public: + virtual ~OnParamChangeUpdater() = default; + + /** + * Resizes the telemetry store decorating 'serviceCtx' to the new size given by 'memSize'. If + * the new size is smaller than the old, cache entries are evicted in order to ensure the + * cache fits within the new size bound. + */ + virtual void updateCacheSize(ServiceContext* serviceCtx, memory_util::MemorySize memSize) = 0; + + /** + * Updates the sampling rate for the telemetry rate limiter. + */ + virtual void updateSamplingRate(ServiceContext* serviceCtx, int samplingRate) = 0; +}; + +/** + * A stub implementation that does not allow changing any parameters - to be used if the telemetry + * store is disabled and cannot be re-enabled without restarting, as with a feature flag. + */ +class NoChangesAllowedTelemetryParamUpdater : public OnParamChangeUpdater { +public: + void updateCacheSize(ServiceContext* serviceCtx, memory_util::MemorySize memSize) final { + uasserted(7373500, + "Cannot configure telemetry store - it is currently disabled and a restart is " + "required to activate."); + } + + void updateSamplingRate(ServiceContext* serviceCtx, int samplingRate) { + uasserted(7506200, + "Cannot configure telemetry store - it is currently disabled and a restart is " + "required to activate."); + } +}; + +/** + * Decorated accessor to the 'OnParamChangeUpdater' stored in 'ServiceContext'. + */ +extern const Decorable::Decoration> + telemetryStoreOnParamChangeUpdater; +} // namespace mongo::telemetry_util diff --git a/src/mongo/s/commands/cluster_find_cmd.h b/src/mongo/s/commands/cluster_find_cmd.h index 6ab7d513d86..942e0893434 100644 --- a/src/mongo/s/commands/cluster_find_cmd.h +++ b/src/mongo/s/commands/cluster_find_cmd.h @@ -39,7 +39,7 @@ #include "mongo/db/matcher/extensions_callback_noop.h" #include "mongo/db/query/cursor_response.h" #include "mongo/db/query/find_request_shapifier.h" -#include "mongo/db/query/query_stats.h" +#include "mongo/db/query/telemetry.h" #include "mongo/db/stats/counters.h" #include "mongo/db/views/resolved_view.h" #include "mongo/rpc/get_status_from_command_result.h" @@ -225,11 +225,11 @@ public: MatchExpressionParser::kAllowAllSpecialFeatures)); if (!_didDoFLERewrite) { - query_stats::registerRequest(std::make_unique( - cq->getFindCommandRequest(), opCtx), - cq->nss(), - opCtx, - cq->getExpCtx()); + telemetry::registerRequest(std::make_unique( + cq->getFindCommandRequest(), opCtx), + cq->nss(), + opCtx, + cq->getExpCtx()); } try { diff --git a/src/mongo/s/query/cluster_aggregate.cpp b/src/mongo/s/query/cluster_aggregate.cpp index 9fd49e2e004..6c9351efe57 100644 --- a/src/mongo/s/query/cluster_aggregate.cpp +++ b/src/mongo/s/query/cluster_aggregate.cpp @@ -56,7 +56,7 @@ #include "mongo/db/query/explain_common.h" #include "mongo/db/query/find_common.h" #include "mongo/db/query/fle/server_rewrite.h" -#include "mongo/db/query/query_stats.h" +#include "mongo/db/query/telemetry.h" #include "mongo/db/timeseries/timeseries_gen.h" #include "mongo/db/timeseries/timeseries_options.h" #include "mongo/db/views/resolved_view.h" @@ -324,7 +324,7 @@ Status ClusterAggregate::runAggregate(OperationContext* opCtx, auto startsWithDocuments = liteParsedPipeline.startsWithDocuments(); if (!shouldDoFLERewrite) { - query_stats::registerAggRequest(request, opCtx); + telemetry::registerAggRequest(request, opCtx); } // If the routing table is not already taken by the higher level, fill it now. diff --git a/src/mongo/s/query/cluster_aggregation_planner.cpp b/src/mongo/s/query/cluster_aggregation_planner.cpp index 8f2c6fcdb19..5aa643c0a85 100644 --- a/src/mongo/s/query/cluster_aggregation_planner.cpp +++ b/src/mongo/s/query/cluster_aggregation_planner.cpp @@ -360,16 +360,16 @@ BSONObj establishMergingMongosCursor(OperationContext* opCtx, int nShards = ccc->getNumRemotes(); auto&& opDebug = CurOp::get(opCtx)->debug(); - // Fill out the aggregation metrics in CurOp, and record queryStats metrics, before detaching - // the cursor from its opCtx. + // Fill out the aggregation metrics in CurOp, and record telemetry metrics, before detaching the + // cursor from its opCtx. opDebug.nShards = std::max(opDebug.nShards, nShards); opDebug.cursorExhausted = exhausted; opDebug.additiveMetrics.nBatches = 1; CurOp::get(opCtx)->setEndOfOpMetrics(responseBuilder.numDocs()); if (exhausted) { - collectQueryStatsMongos(opCtx, ccc->getRequestShapifier()); + collectTelemetryMongos(opCtx, ccc->getRequestShapifier()); } else { - collectQueryStatsMongos(opCtx, ccc); + collectTelemetryMongos(opCtx, ccc); } ccc->detachFromOperationContext(); diff --git a/src/mongo/s/query/cluster_client_cursor.h b/src/mongo/s/query/cluster_client_cursor.h index 008bacd5ef6..1f0d9be54a7 100644 --- a/src/mongo/s/query/cluster_client_cursor.h +++ b/src/mongo/s/query/cluster_client_cursor.h @@ -270,11 +270,11 @@ public: * Returns and releases ownership of the RequestShapifier associated with the request this * cursor is handling. */ - virtual std::unique_ptr getRequestShapifier() = 0; + virtual std::unique_ptr getRequestShapifier() = 0; protected: // Metrics that are accumulated over the lifetime of the cursor, incremented with each getMore. - // Useful for diagnostics like queryStats. + // Useful for diagnostics like telemetry. OpDebug::AdditiveMetrics _metrics; private: diff --git a/src/mongo/s/query/cluster_client_cursor_impl.cpp b/src/mongo/s/query/cluster_client_cursor_impl.cpp index 9da06d36881..939637d0f32 100644 --- a/src/mongo/s/query/cluster_client_cursor_impl.cpp +++ b/src/mongo/s/query/cluster_client_cursor_impl.cpp @@ -32,7 +32,7 @@ #include #include "mongo/db/curop.h" -#include "mongo/db/query/query_stats.h" +#include "mongo/db/query/telemetry.h" #include "mongo/logv2/log.h" #include "mongo/s/query/router_stage_limit.h" #include "mongo/s/query/router_stage_merge.h" @@ -75,10 +75,9 @@ ClusterClientCursorImpl::ClusterClientCursorImpl(OperationContext* opCtx, _lastUseDate(_createdDate), _queryHash(CurOp::get(opCtx)->debug().queryHash), _shouldOmitDiagnosticInformation(CurOp::get(opCtx)->debug().shouldOmitDiagnosticInformation), - _queryStatsStoreKeyHash(CurOp::get(opCtx)->debug().queryStatsStoreKeyHash), - _queryStatsStoreKey(CurOp::get(opCtx)->debug().queryStatsStoreKey), - _queryStatsRequestShapifier( - std::move(CurOp::get(opCtx)->debug().queryStatsRequestShapifier)) { + _telemetryStoreKeyHash(CurOp::get(opCtx)->debug().telemetryStoreKeyHash), + _telemetryStoreKey(CurOp::get(opCtx)->debug().telemetryStoreKey), + _telemetryRequestShapifier(std::move(CurOp::get(opCtx)->debug().telemetryRequestShapifier)) { dassert(!_params.compareWholeSortKeyOnRouter || SimpleBSONObjComparator::kInstance.evaluate( _params.sortToApplyOnRouter == AsyncResultsMerger::kWholeSortKeySortPattern)); @@ -138,13 +137,13 @@ void ClusterClientCursorImpl::kill(OperationContext* opCtx) { "Cannot kill a cluster client cursor that has already been killed", !_hasBeenKilled); - if (_queryStatsStoreKeyHash && opCtx) { - query_stats::writeQueryStats(opCtx, - _queryStatsStoreKeyHash, - _queryStatsStoreKey, - std::move(_queryStatsRequestShapifier), - _metrics.executionTime.value_or(Microseconds{0}).count(), - _metrics.nreturned.value_or(0)); + if (_telemetryStoreKeyHash && opCtx) { + telemetry::writeTelemetry(opCtx, + _telemetryStoreKeyHash, + _telemetryStoreKey, + std::move(_telemetryRequestShapifier), + _metrics.executionTime.value_or(Microseconds{0}).count(), + _metrics.nreturned.value_or(0)); } _root->kill(opCtx); @@ -286,8 +285,8 @@ bool ClusterClientCursorImpl::shouldOmitDiagnosticInformation() const { return _shouldOmitDiagnosticInformation; } -std::unique_ptr ClusterClientCursorImpl::getRequestShapifier() { - return std::move(_queryStatsRequestShapifier); +std::unique_ptr ClusterClientCursorImpl::getRequestShapifier() { + return std::move(_telemetryRequestShapifier); } } // namespace mongo diff --git a/src/mongo/s/query/cluster_client_cursor_impl.h b/src/mongo/s/query/cluster_client_cursor_impl.h index 9d9168d6afb..ecb7535715c 100644 --- a/src/mongo/s/query/cluster_client_cursor_impl.h +++ b/src/mongo/s/query/cluster_client_cursor_impl.h @@ -121,7 +121,7 @@ public: bool shouldOmitDiagnosticInformation() const final; - std::unique_ptr getRequestShapifier() final; + std::unique_ptr getRequestShapifier() final; public: /** @@ -186,12 +186,12 @@ private: bool _shouldOmitDiagnosticInformation = false; // If boost::none, telemetry should not be collected for this cursor. - boost::optional _queryStatsStoreKeyHash; - // TODO: SERVER-73152 remove queryStatsStoreKey when RequestShapifier is used for agg. - boost::optional _queryStatsStoreKey; + boost::optional _telemetryStoreKeyHash; + // TODO: SERVER-73152 remove telemetryStoreKey when RequestShapifier is used for agg. + boost::optional _telemetryStoreKey; // The RequestShapifier used by telemetry to shapify the request payload into the telemetry // store key. - std::unique_ptr _queryStatsRequestShapifier; + std::unique_ptr _telemetryRequestShapifier; // Tracks if kill() has been called on the cursor. Multiple calls to kill() is an error. bool _hasBeenKilled = false; diff --git a/src/mongo/s/query/cluster_client_cursor_mock.cpp b/src/mongo/s/query/cluster_client_cursor_mock.cpp index 1e8b3561f5c..e495227b704 100644 --- a/src/mongo/s/query/cluster_client_cursor_mock.cpp +++ b/src/mongo/s/query/cluster_client_cursor_mock.cpp @@ -170,7 +170,7 @@ bool ClusterClientCursorMock::shouldOmitDiagnosticInformation() const { return false; } -std::unique_ptr ClusterClientCursorMock::getRequestShapifier() { +std::unique_ptr ClusterClientCursorMock::getRequestShapifier() { return nullptr; } diff --git a/src/mongo/s/query/cluster_client_cursor_mock.h b/src/mongo/s/query/cluster_client_cursor_mock.h index 750a67abdde..131ca234287 100644 --- a/src/mongo/s/query/cluster_client_cursor_mock.h +++ b/src/mongo/s/query/cluster_client_cursor_mock.h @@ -121,7 +121,7 @@ public: bool shouldOmitDiagnosticInformation() const final; - std::unique_ptr getRequestShapifier() final; + std::unique_ptr getRequestShapifier() final; private: bool _killed = false; diff --git a/src/mongo/s/query/cluster_cursor_manager.cpp b/src/mongo/s/query/cluster_cursor_manager.cpp index 68436d25c6e..d8e47e55ecf 100644 --- a/src/mongo/s/query/cluster_cursor_manager.cpp +++ b/src/mongo/s/query/cluster_cursor_manager.cpp @@ -38,7 +38,7 @@ #include "mongo/db/allocate_cursor_id.h" #include "mongo/db/curop.h" #include "mongo/db/query/query_knobs_gen.h" -#include "mongo/db/query/query_stats.h" +#include "mongo/db/query/telemetry.h" #include "mongo/db/session/kill_sessions_common.h" #include "mongo/db/session/logical_session_cache.h" #include "mongo/logv2/log.h" @@ -591,25 +591,25 @@ StatusWith ClusterCursorManager::_detachCursor(WithLoc return std::move(cursor); } -void collectQueryStatsMongos(OperationContext* opCtx, - std::unique_ptr requestShapifier) { +void collectTelemetryMongos(OperationContext* opCtx, + std::unique_ptr requestShapifier) { // If we haven't registered a cursor to prepare for getMore requests, we record - // queryStats directly. + // telemetry directly. auto&& opDebug = CurOp::get(opCtx)->debug(); - query_stats::writeQueryStats( + telemetry::writeTelemetry( opCtx, - opDebug.queryStatsStoreKeyHash, - opDebug.queryStatsStoreKey, + opDebug.telemetryStoreKeyHash, + opDebug.telemetryStoreKey, std::move(requestShapifier), opDebug.additiveMetrics.executionTime.value_or(Microseconds{0}).count(), opDebug.additiveMetrics.nreturned.value_or(0)); } -void collectQueryStatsMongos(OperationContext* opCtx, ClusterClientCursorGuard& cursor) { +void collectTelemetryMongos(OperationContext* opCtx, ClusterClientCursorGuard& cursor) { cursor->incrementCursorMetrics(CurOp::get(opCtx)->debug().additiveMetrics); } -void collectQueryStatsMongos(OperationContext* opCtx, ClusterCursorManager::PinnedCursor& cursor) { +void collectTelemetryMongos(OperationContext* opCtx, ClusterCursorManager::PinnedCursor& cursor) { cursor->incrementCursorMetrics(CurOp::get(opCtx)->debug().additiveMetrics); } diff --git a/src/mongo/s/query/cluster_cursor_manager.h b/src/mongo/s/query/cluster_cursor_manager.h index b10824baf09..219dd773f82 100644 --- a/src/mongo/s/query/cluster_cursor_manager.h +++ b/src/mongo/s/query/cluster_cursor_manager.h @@ -600,7 +600,7 @@ private: }; /** - * Record metrics for the current operation on opDebug and aggregates those metrics for queryStats + * Record metrics for the current operation on opDebug and aggregates those metrics for telemetry * use. If a cursor is provided (via ClusterClientCursorGuard or * ClusterCursorManager::PinnedCursor), metrics are aggregated on the cursor; otherwise, metrics are * written directly to the telemetry store. @@ -610,9 +610,9 @@ private: * Currently, telemetry is only collected for find and aggregate requests (and their subsequent * getMore requests), so these should only be called from those request paths. */ -void collectQueryStatsMongos(OperationContext* opCtx, - std::unique_ptr requestShapifier); -void collectQueryStatsMongos(OperationContext* opCtx, ClusterClientCursorGuard& cursor); -void collectQueryStatsMongos(OperationContext* opCtx, ClusterCursorManager::PinnedCursor& cursor); +void collectTelemetryMongos(OperationContext* opCtx, + std::unique_ptr requestShapifier); +void collectTelemetryMongos(OperationContext* opCtx, ClusterClientCursorGuard& cursor); +void collectTelemetryMongos(OperationContext* opCtx, ClusterCursorManager::PinnedCursor& cursor); } // namespace mongo diff --git a/src/mongo/s/query/cluster_find.cpp b/src/mongo/s/query/cluster_find.cpp index d0bd48a0d51..5b340ec098a 100644 --- a/src/mongo/s/query/cluster_find.cpp +++ b/src/mongo/s/query/cluster_find.cpp @@ -48,7 +48,7 @@ #include "mongo/db/query/find_common.h" #include "mongo/db/query/getmore_command_gen.h" #include "mongo/db/query/query_planner_common.h" -#include "mongo/db/query/query_stats.h" +#include "mongo/db/query/telemetry.h" #include "mongo/executor/task_executor_pool.h" #include "mongo/logv2/log.h" #include "mongo/platform/overflow_arithmetic.h" @@ -444,7 +444,7 @@ CursorId runQueryWithoutRetrying(OperationContext* opCtx, if (shardIds.size() > 0) { updateNumHostsTargetedMetrics(opCtx, cm, shardIds.size()); } - collectQueryStatsMongos(opCtx, ccc->getRequestShapifier()); + collectTelemetryMongos(opCtx, ccc->getRequestShapifier()); return CursorId(0); } @@ -455,7 +455,7 @@ CursorId runQueryWithoutRetrying(OperationContext* opCtx, ? ClusterCursorManager::CursorLifetime::Immortal : ClusterCursorManager::CursorLifetime::Mortal; auto authUser = AuthorizationSession::get(opCtx->getClient())->getAuthenticatedUserName(); - collectQueryStatsMongos(opCtx, ccc); + collectTelemetryMongos(opCtx, ccc); auto cursorId = uassertStatusOK(cursorManager->registerCursor( opCtx, ccc.releaseCursor(), query.nss(), cursorType, cursorLifetime, authUser)); @@ -923,7 +923,7 @@ StatusWith ClusterFind::runGetMore(OperationContext* opCtx, const bool partialResultsReturned = pinnedCursor.getValue()->partialResultsReturned(); pinnedCursor.getValue()->setLeftoverMaxTimeMicros(opCtx->getRemainingMaxTimeMicros()); - collectQueryStatsMongos(opCtx, pinnedCursor.getValue()); + collectTelemetryMongos(opCtx, pinnedCursor.getValue()); // Upon successful completion, transfer ownership of the cursor back to the cursor manager. If // the cursor has been exhausted, the cursor manager will clean it up for us. diff --git a/src/mongo/s/query/store_possible_cursor.cpp b/src/mongo/s/query/store_possible_cursor.cpp index a5c6759f4d1..38cec4024ed 100644 --- a/src/mongo/s/query/store_possible_cursor.cpp +++ b/src/mongo/s/query/store_possible_cursor.cpp @@ -98,7 +98,7 @@ StatusWith storePossibleCursor(OperationContext* opCtx, if (incomingCursorResponse.getValue().getCursorId() == CursorId(0)) { opDebug.cursorExhausted = true; - collectQueryStatsMongos(opCtx, std::move(opDebug.queryStatsRequestShapifier)); + collectTelemetryMongos(opCtx, std::move(opDebug.telemetryRequestShapifier)); return cmdResult; } @@ -130,7 +130,7 @@ StatusWith storePossibleCursor(OperationContext* opCtx, } auto ccc = ClusterClientCursorImpl::make(opCtx, std::move(executor), std::move(params)); - collectQueryStatsMongos(opCtx, ccc); + collectTelemetryMongos(opCtx, ccc); // We don't expect to use this cursor until a subsequent getMore, so detach from the current // OperationContext until then. ccc->detachFromOperationContext(); -- cgit v1.2.1