From d2995e8e1ad5091412098e84cac061d96f2879bf Mon Sep 17 00:00:00 2001 From: Blake Oler Date: Mon, 9 Jan 2023 18:37:18 +0000 Subject: SERVER-72598 Introduce server parameter to turn off logging for detailed connection health metrics --- jstests/auth/auth-metrics.js | 210 +++++++++++++-------- jstests/libs/ingress_handshake_metrics_helpers.js | 25 ++- .../connection_establishment_metrics.js | 132 ++++++++----- jstests/noPassthrough/slow_session_workflow_log.js | 16 +- .../ingress_handshake_and_auth_metrics.js | 33 ++-- jstests/ocsp/ocsp_client_verification_logging.js | 183 ++++++++++-------- .../ingress_handshake_and_auth_metrics_mongos.js | 31 +-- jstests/ssl/ssl_ingress_conn_metrics.js | 129 ++++++++----- src/mongo/client/SConscript | 1 + src/mongo/client/async_client.cpp | 25 +-- src/mongo/db/SConscript | 12 ++ src/mongo/db/auth/SConscript | 2 + src/mongo/db/auth/authentication_session.cpp | 47 ++--- src/mongo/db/auth/sasl_mechanism_registry.cpp | 16 +- .../db/auth/sasl_plain_server_conversation.cpp | 16 +- .../db/auth/sasl_scram_server_conversation.cpp | 16 +- .../db/connection_health_metrics_parameter.idl | 40 ++++ src/mongo/executor/connection_pool_tl.cpp | 13 +- src/mongo/executor/network_interface_tl.cpp | 21 ++- src/mongo/shell/shell_options.cpp | 1 + src/mongo/transport/SConscript | 1 + src/mongo/transport/asio/asio_session.cpp | 9 +- src/mongo/transport/ingress_handshake_metrics.cpp | 17 +- src/mongo/transport/session_workflow.cpp | 5 + src/mongo/util/net/SConscript | 1 + src/mongo/util/net/ssl_manager_apple.cpp | 3 +- src/mongo/util/net/ssl_manager_openssl.cpp | 6 +- src/mongo/util/net/ssl_manager_windows.cpp | 3 +- 28 files changed, 663 insertions(+), 351 deletions(-) create mode 100644 src/mongo/db/connection_health_metrics_parameter.idl diff --git a/jstests/auth/auth-metrics.js b/jstests/auth/auth-metrics.js index 68ea5937d50..7c87229906e 100644 --- a/jstests/auth/auth-metrics.js +++ b/jstests/auth/auth-metrics.js @@ -54,7 +54,7 @@ function authnFailureIncrementsServerStatusTotalAuthTime(mongodRunner) { } // Test that a successful authentication is logged correctly -function authnSuccessLogsMetricsReportWithSuccessStatus(mongodRunner) { +function authnSuccessLogsMetricsReportWithSuccessStatus(mongodRunner, connectionHealthLoggingOn) { jsTest.log( "============================ authnSuccessLogsMetricsReportWithSuccessStatus ============================"); @@ -65,44 +65,59 @@ function authnSuccessLogsMetricsReportWithSuccessStatus(mongodRunner) { expectedSuccessLogs += 1; admin.auth('admin', 'pwd'); - assert.soon( - () => checkLog.checkContainsWithCountJson( - mongodRunner, - 5286306, - { - "result": 0, - "metrics": {"conversation_duration": {"summary": [{"step": 1}, {"step": 2}]}} - }, - expectedSuccessLogs, - null, - true), - "Did not find expected 1 successful metric log entries"); + if (connectionHealthLoggingOn) { + assert.soon( + () => checkLog.checkContainsWithCountJson( + mongodRunner, + 5286306, + { + "result": 0, + "metrics": {"conversation_duration": {"summary": [{"step": 1}, {"step": 2}]}} + }, + expectedSuccessLogs, + null, + true), + "Did not find expected 1 successful metric log entries"); + } else { + assert.eq(checkLog.checkContainsWithCountJson(mongodRunner, 5286306, {}), + false, + "Expected not to find metric log entries"); + } + admin.logout(); } // Test that authentication failure is logged correctly -function authnFailureLogsMetricsReportWithFailedStatus(mongodRunner) { +function authnFailureLogsMetricsReportWithFailedStatus(mongodRunner, connectionHealthLoggingOn) { jsTest.log( "============================ authnFailureLogsMetricsReportWithFailedStatus ============================"); const admin = mongodRunner.getDB("admin"); admin.auth('admin', 'wrong'); expectedFailureLogs += 1; - assert.soon( - () => checkLog.checkContainsWithCountJson( - mongodRunner, - 5286307, - {"result": 18, "metrics": {"conversation_duration": {"summary": [{"step": 1}]}}}, - expectedFailureLogs, - null, - true), - "Did not find expected 1 failure metric log entries"); + + if (connectionHealthLoggingOn) { + assert.soon( + () => checkLog.checkContainsWithCountJson( + mongodRunner, + 5286307, + {"result": 18, "metrics": {"conversation_duration": {"summary": [{"step": 1}]}}}, + expectedFailureLogs, + null, + true), + "Did not find expected 1 failure metric log entries"); + } else { + assert.eq(checkContainsWithCountJson(mongodRunner, 5286307, {}), + false, + "Expected not to find failure metric log entries"); + } + admin.logout(); } // Test that multiple successful authentications across the same client produce the correct log // output and do not e.g. combine summaries or results -function multipleAuthnSuccessLogsMultipleCorrectReports(mongodRunner) { +function multipleAuthnSuccessLogsMultipleCorrectReports(mongodRunner, connectionHealthLoggingOn) { jsTest.log( "============================ multipleAuthnSuccessLogsMultipleCorrectReports ============================"); @@ -114,22 +129,28 @@ function multipleAuthnSuccessLogsMultipleCorrectReports(mongodRunner) { expectedSuccessLogs += 2; - assert.soon( - () => checkLog.checkContainsWithCountJson( - mongodRunner, - 5286306, - { - "result": 0, - "metrics": {"conversation_duration": {"summary": [{"step": 1}, {"step": 2}]}} - }, - expectedSuccessLogs, - null, - true), - "Did not find expected 2 successful metric log entries"); + if (connectionHealthLoggingOn) { + assert.soon( + () => checkLog.checkContainsWithCountJson( + mongodRunner, + 5286306, + { + "result": 0, + "metrics": {"conversation_duration": {"summary": [{"step": 1}, {"step": 2}]}} + }, + expectedSuccessLogs, + null, + true), + "Did not find expected 2 successful metric log entries"); + } else { + assert.eq(checkLog.checkContainsWithCountJson(mongodRunner, 5286306, {}), + false, + "Expected not to find successful metric log entries"); + } } // Test that multiple authentication failure is logged correctly -function authnFailureLogsMetricsReportWithFailedStatus(mongodRunner) { +function authnFailureLogsMetricsReportWithFailedStatus(mongodRunner, connectionHealthLoggingOn) { jsTest.log( "============================ authnFailureLogsMetricsReportWithFailedStatus ============================"); @@ -138,21 +159,29 @@ function authnFailureLogsMetricsReportWithFailedStatus(mongodRunner) { admin.auth('admin', 'wrong_2'); expectedFailureLogs += 2; - assert.soon( - () => checkLog.checkContainsWithCountJson( - mongodRunner, - 5286307, - {"result": 18, "metrics": {"conversation_duration": {"summary": [{"step": 1}]}}}, - expectedFailureLogs, - null, - true), - "Did not find expected 2 failure metric log entries"); + + if (connectionHealthLoggingOn) { + assert.soon( + () => checkLog.checkContainsWithCountJson( + mongodRunner, + 5286307, + {"result": 18, "metrics": {"conversation_duration": {"summary": [{"step": 1}]}}}, + expectedFailureLogs, + null, + true), + "Did not find expected 2 failure metric log entries"); + } else { + assert.eq(checkLog.checkContainsWithCountJson(mongodRunner, 5286307, {}), + false, + "Expected not to find failure metric log entries"); + } + admin.logout(); } // Test that multiple mixed authentications across the same client produce the correct log output // and do not e.g. combine summaries or results -function multipleAuthnMixedLogsMultipleCorrectReports(mongodRunner) { +function multipleAuthnMixedLogsMultipleCorrectReports(mongodRunner, connectionHealthLoggingOn) { jsTest.log( "============================ multipleAuthnMixedLogsMultipleCorrectReports ============================"); @@ -163,49 +192,68 @@ function multipleAuthnMixedLogsMultipleCorrectReports(mongodRunner) { expectedSuccessLogs += 1; - assert.soon( - () => checkLog.checkContainsWithCountJson( - mongodRunner, - 5286306, - { - "result": 0, - "metrics": {"conversation_duration": {"summary": [{"step": 1}, {"step": 2}]}} - }, - expectedSuccessLogs, - null, - true), - "Did not find expected 1 successful metric log entries"); + if (connectionHealthLoggingOn) { + assert.soon( + () => checkLog.checkContainsWithCountJson( + mongodRunner, + 5286306, + { + "result": 0, + "metrics": {"conversation_duration": {"summary": [{"step": 1}, {"step": 2}]}} + }, + expectedSuccessLogs, + null, + true), + "Did not find expected 1 successful metric log entries"); + } else { + assert.eq(checkLog.checkContainsWithCountJson(mongodRunner, 5286306, {}), + false, + "Expected not to find successful metric log entries"); + } expectedFailureLogs += 1; admin.auth('admin', 'wrong'); - assert.soon( - () => checkLog.checkContainsWithCountJson( - mongodRunner, - 5286307, - {"result": 18, "metrics": {"conversation_duration": {"summary": [{"step": 1}]}}}, - expectedFailureLogs, - null, - true), - "Did not find expected 1 failure metric log entries"); + if (connectionHealthLoggingOn) { + assert.soon( + () => checkLog.checkContainsWithCountJson( + mongodRunner, + 5286307, + {"result": 18, "metrics": {"conversation_duration": {"summary": [{"step": 1}]}}}, + expectedFailureLogs, + null, + true), + "Did not find expected 1 failure metric log entries"); + } else { + assert.eq(checkLog.checkContainsWithCountJson(mongodRunner, 5286307, {}), + false, + "Expected not to find failure metric log entries"); + } } -const mongod = MongoRunner.runMongod(); +let runTest = (connectionHealthLoggingOn) => { + const mongod = MongoRunner.runMongod( + {setParameter: {enableDetailedConnectionHealthMetricLogLines: connectionHealthLoggingOn}}); -try { - mongod.getDB("admin").createUser( - {user: 'admin', pwd: 'pwd', roles: ['root'], mechanisms: ['SCRAM-SHA-256']}); + try { + mongod.getDB("admin").createUser( + {user: 'admin', pwd: 'pwd', roles: ['root'], mechanisms: ['SCRAM-SHA-256']}); - authnSuccessLogsMetricsReportWithSuccessStatus(mongod); - authnFailureLogsMetricsReportWithFailedStatus(mongod); - multipleAuthnSuccessLogsMultipleCorrectReports(mongod); - multipleAuthnMixedLogsMultipleCorrectReports(mongod); + authnSuccessLogsMetricsReportWithSuccessStatus(mongod, connectionHealthLoggingOn); + authnFailureLogsMetricsReportWithFailedStatus(mongod, connectionHealthLoggingOn); + multipleAuthnSuccessLogsMultipleCorrectReports(mongod, connectionHealthLoggingOn); + multipleAuthnMixedLogsMultipleCorrectReports(mongod, connectionHealthLoggingOn); - authnSuccessIncrementsServerStatusTotalAuthTime(mongod); - authnFailureIncrementsServerStatusTotalAuthTime(mongod); + authnSuccessIncrementsServerStatusTotalAuthTime(mongod, connectionHealthLoggingOn); + authnFailureIncrementsServerStatusTotalAuthTime(mongod, connectionHealthLoggingOn); -} finally { - MongoRunner.stopMongod(mongod); -} + } finally { + MongoRunner.stopMongod(mongod); + } +}; + +// Parameterized on enabling/disabling connection health logging. +runTest(true); +runTest(false); })(); diff --git a/jstests/libs/ingress_handshake_metrics_helpers.js b/jstests/libs/ingress_handshake_metrics_helpers.js index dc1a79aff8c..6d4f39e1478 100644 --- a/jstests/libs/ingress_handshake_metrics_helpers.js +++ b/jstests/libs/ingress_handshake_metrics_helpers.js @@ -12,6 +12,7 @@ function ingressHandshakeMetricsTest(conn, options) { guestCredentials = {user: 'guest', pwd: 'guest'}, dbName = 'test', collectionName = 'test_coll', + connectionHealthLoggingOn, preAuthDelayMillis, postAuthDelayMillis, helloProcessingDelayMillis, @@ -31,6 +32,11 @@ function ingressHandshakeMetricsTest(conn, options) { db.createUser(Object.assign(guestCredentials, {roles: jsTest.readOnlyUserRoles})); db[collectionName].insert({foo: 42}); + + if (!connectionHealthLoggingOn) { + assert.commandWorked(conn.adminCommand( + {setParameter: 1, enableDetailedConnectionHealthMetricLogLines: false})); + } } function performAuthTestConnection() { @@ -85,7 +91,14 @@ function ingressHandshakeMetricsTest(conn, options) { performAuthTestConnection(); - assert(timingLogLineExists, "No matching 'first non-auth command' log line"); + if (connectionHealthLoggingOn) { + assert(timingLogLineExists, "No matching 'first non-auth command' log line"); + } else { + assert.eq( + timingLogLineExists(), + false, + "Found 'first non-auth command log line' despite disabling connection health logging"); + } let metricAfterTest = getTotalTimeToFirstNonAuthCommandMillis(); assert.gte(metricAfterTest - metricBeforeTest, totalDelayMillis); @@ -93,7 +106,15 @@ function ingressHandshakeMetricsTest(conn, options) { function performHelloMetricsTest() { performHelloTestConnection(); - assert(helloCompletedLogLineExists, "No matching 'hello completed' log line"); + + if (connectionHealthLoggingOn) { + assert(helloCompletedLogLineExists, "No matching 'hello completed' log line"); + } else { + assert.eq( + helloCompletedLogLineExists(), + false, + "Found 'hello completed' log line despite disabling conection health logging"); + } } // Setup the test and return the function that will perform the test when called diff --git a/jstests/noPassthrough/connection_establishment_metrics.js b/jstests/noPassthrough/connection_establishment_metrics.js index 1a57dc2a25b..3ef97392b60 100644 --- a/jstests/noPassthrough/connection_establishment_metrics.js +++ b/jstests/noPassthrough/connection_establishment_metrics.js @@ -39,7 +39,8 @@ function validateSlowConnectionLogEntry(entry) { } assert.eq(total, entry.attr.totalTimeMillis); } -function validateLogAndExtractEntry() { + +function validateLogAndExtractEntry(st) { const mongosLog = assert.commandWorked(st.s.adminCommand({getLog: "global"})); let queryLogEntry = null; for (const line of findMatchingLogLines(mongosLog.log, {id: 6496400})) { @@ -52,59 +53,98 @@ function validateLogAndExtractEntry() { return queryLogEntry; } +function validateLogAndExtractCountAndEntry(st) { + const mongosLog = assert.commandWorked(st.s.adminCommand({getLog: "global"})); + let queryLogEntry = null; + let matchingLines = findMatchingLogLines(mongosLog.log, {id: 6496400}); + for (const line of matchingLines) { + let entry = JSON.parse(line); + validateSlowConnectionLogEntry(entry); + if (entry.attr.totalTimeMillis >= kConnectionEstablishmentDelayMillis) { + queryLogEntry = entry; + } + } + return {count: matchingLines.length, entry: queryLogEntry}; +} + const kConnectionEstablishmentDelayMillis = 250; const kDBName = 'TestDB'; const kCollectionName = 'sharded_coll'; const kKeyName = 'foo'; -let st = new ShardingTest({shards: 1}); - -jsTestLog("Setting up the test collection."); - -assert.commandWorked(st.s.adminCommand({enableSharding: kDBName})); -assert.commandWorked( - st.s.adminCommand({shardcollection: `${kDBName}.${kCollectionName}`, key: {[kKeyName]: 1}})); - -let db = st.getDB(kDBName); -assert.commandWorked(db[kCollectionName].insertOne({primaryOnly: true, [kKeyName]: 42})); - -jsTestLog("Activating the delay in connection establishment."); -let connDelayFailPoint = configureFailPoint( - st.s, 'asioTransportLayerDelayConnection', {millis: kConnectionEstablishmentDelayMillis}); -assert.commandWorked(st.s.adminCommand( - {setParameter: 1, slowConnectionThresholdMillis: kConnectionEstablishmentDelayMillis})); -assert.commandWorked( - st.s.adminCommand({dropConnections: 1, hostAndPort: [st.rs0.getPrimary().host]})); - -jsTestLog("Running the query."); - -function runTestQuery(db) { - return startParallelShell( - funWithArgs((host, dbName, collName, keyName) => { - let conn = new Mongo(host); - assert.eq(1, - conn.getDB(dbName) - .getCollection(collName) - .find({primaryOnly: true, [keyName]: 42}) - .itcount()); - }, db.getMongo().host, db.getName(), kCollectionName, kKeyName), null, true); -} -let queryShell = runTestQuery(db); +let runTest = (connectionHealthLoggingOn) => { + let st = new ShardingTest({shards: 1}); + + if (!connectionHealthLoggingOn) { + assert.commandWorked(st.s.adminCommand( + {setParameter: 1, enableDetailedConnectionHealthMetricLogLines: false})); + } + + const initialLogEntryCount = validateLogAndExtractCountAndEntry(st).count; + + jsTestLog("Setting up the test collection."); + + assert.commandWorked(st.s.adminCommand({enableSharding: kDBName})); + assert.commandWorked(st.s.adminCommand( + {shardcollection: `${kDBName}.${kCollectionName}`, key: {[kKeyName]: 1}})); + + let db = st.getDB(kDBName); + assert.commandWorked(db[kCollectionName].insertOne({primaryOnly: true, [kKeyName]: 42})); + + jsTestLog("Activating the delay in connection establishment."); + let connDelayFailPoint = configureFailPoint( + st.s, 'asioTransportLayerDelayConnection', {millis: kConnectionEstablishmentDelayMillis}); + assert.commandWorked(st.s.adminCommand( + {setParameter: 1, slowConnectionThresholdMillis: kConnectionEstablishmentDelayMillis})); + assert.commandWorked( + st.s.adminCommand({dropConnections: 1, hostAndPort: [st.rs0.getPrimary().host]})); + + jsTestLog("Running the query."); + + function runTestQuery(db) { + return startParallelShell( + funWithArgs((host, dbName, collName, keyName) => { + let conn = new Mongo(host); + assert.eq(1, + conn.getDB(dbName) + .getCollection(collName) + .find({primaryOnly: true, [keyName]: 42}) + .itcount()); + }, db.getMongo().host, db.getName(), kCollectionName, kKeyName), null, true); + } + let queryShell = runTestQuery(db); + + if (connectionHealthLoggingOn) { + jsTestLog("Checking the mongos log."); + + assert.soon(() => validateLogAndExtractCountAndEntry(st).entry != null, + "Slow connection establishment log entry not found."); -jsTestLog("Checking the mongos log."); + queryShell(); + connDelayFailPoint.off(); -assert.soon(() => validateLogAndExtractEntry() != null, - "Slow connection establishment log entry not found."); + jsTestLog("Checking the output of serverStatus."); + let queryLogEntry = validateLogAndExtractCountAndEntry(st).entry; + let status = assert.commandWorked(st.s.adminCommand({serverStatus: 1})); + printjson(status); + assert.gte(status.metrics.network.totalEgressConnectionEstablishmentTimeMillis, + queryLogEntry.attr.totalTimeMillis); + } else { + assert.eq(validateLogAndExtractCountAndEntry(st).count, initialLogEntryCount); -queryShell(); -connDelayFailPoint.off(); + queryShell(); + connDelayFailPoint.off(); + + jsTestLog("Checking the output of serverStatus."); + let status = assert.commandWorked(st.s.adminCommand({serverStatus: 1})); + printjson(status); + assert.gte(status.metrics.network.totalEgressConnectionEstablishmentTimeMillis, 0); + } -jsTestLog("Checking the output of serverStatus."); -let queryLogEntry = validateLogAndExtractEntry(); -let status = assert.commandWorked(st.s.adminCommand({serverStatus: 1})); -printjson(status); -assert.gte(status.metrics.network.totalEgressConnectionEstablishmentTimeMillis, - queryLogEntry.attr.totalTimeMillis); + st.stop(); +}; -st.stop(); +// Parameter is connectionHealthLoggingOn == true/false +runTest(true); +runTest(false); })(); diff --git a/jstests/noPassthrough/slow_session_workflow_log.js b/jstests/noPassthrough/slow_session_workflow_log.js index aa4c131ffd2..4a1788a1270 100644 --- a/jstests/noPassthrough/slow_session_workflow_log.js +++ b/jstests/noPassthrough/slow_session_workflow_log.js @@ -52,7 +52,7 @@ function runTest(conn) { coll.find().toArray(); fp.off(); let logAndCount = getSlowLogAndCount(conn); - const slowSessionWorkflowCount = logAndCount.count; + let slowSessionWorkflowCount = logAndCount.count; assert.gt(slowSessionWorkflowCount, prevSlowSessionWorkflowCount, "Expected to find at least one slow SessionWorkflow log."); @@ -70,6 +70,20 @@ function runTest(conn) { assert.gte(sendResponseElapsed, sleepMillisInSendResponse, "The time reported sending a response didn't include the sleep in the failpoint."); + + assert.commandWorked( + conn.adminCommand({setParameter: 1, enableDetailedConnectionHealthMetricLogLines: false})); + assert.commandWorked(conn.adminCommand({clearLog: 'global'})); + + // Wait, then do a query beyond the 100ms threshold. Make sure the slow loop log line does not + // exist this time. + const fp2 = configureFailPoint( + conn, "sessionWorkflowDelaySendMessage", {millis: sleepMillisInSendResponse}); + coll.find().toArray(); + fp2.off(); + logAndCount = getSlowLogAndCount(conn); + slowSessionWorkflowCount = logAndCount.count; + assert.eq(slowSessionWorkflowCount, 0); } // Test standalone. diff --git a/jstests/noPassthroughWithMongod/ingress_handshake_and_auth_metrics.js b/jstests/noPassthroughWithMongod/ingress_handshake_and_auth_metrics.js index a178cceaa09..4c4b3b8ae0a 100644 --- a/jstests/noPassthroughWithMongod/ingress_handshake_and_auth_metrics.js +++ b/jstests/noPassthroughWithMongod/ingress_handshake_and_auth_metrics.js @@ -8,20 +8,27 @@ load('jstests/libs/ingress_handshake_metrics_helpers.js'); -let rootCreds = {user: 'root', pwd: 'root'}; -let conn = MongoRunner.runMongod({auth: ''}); +let runTest = (connectionHealthLoggingOn) => { + let rootCreds = {user: 'root', pwd: 'root'}; + let conn = MongoRunner.runMongod({auth: ''}); -jsTestLog("Setting up users and test data."); -let runTest = ingressHandshakeMetricsTest(conn, { - preAuthDelayMillis: 50, - postAuthDelayMillis: 100, - helloProcessingDelayMillis: 50, - helloResponseDelayMillis: 100, - rootCredentials: rootCreds -}); + jsTestLog("Setting up users and test data."); + let runMetricsTest = ingressHandshakeMetricsTest(conn, { + connectionHealthLoggingOn: connectionHealthLoggingOn, + preAuthDelayMillis: 50, + postAuthDelayMillis: 100, + helloProcessingDelayMillis: 50, + helloResponseDelayMillis: 100, + rootCredentials: rootCreds + }); -jsTestLog("Connecting to mongod and running the test."); -runTest(); + jsTestLog("Connecting to mongod and running the test."); + runMetricsTest(); -MongoRunner.stopMongod(conn, null, rootCreds); + MongoRunner.stopMongod(conn, null, rootCreds); +}; + +// Parameterized on enabling/disabling connection health logging. +runTest(true); +runTest(false); })(); diff --git a/jstests/ocsp/ocsp_client_verification_logging.js b/jstests/ocsp/ocsp_client_verification_logging.js index 9fc857f44de..cd883380e7e 100644 --- a/jstests/ocsp/ocsp_client_verification_logging.js +++ b/jstests/ocsp/ocsp_client_verification_logging.js @@ -13,79 +13,114 @@ if (determineSSLProvider() !== "openssl") { return; } -const ocsp_options = { - sslMode: "requireSSL", - sslPEMKeyFile: OCSP_SERVER_CERT, - sslCAFile: OCSP_CA_PEM, - sslAllowInvalidHostnames: "", - setParameter: { - "failpoint.disableStapling": "{'mode':'alwaysOn'}", - "ocspEnabled": "true", - }, +const mongodOptions = (connectionHealthLoggingOn) => { + return { + sslMode: "requireSSL", + sslPEMKeyFile: OCSP_SERVER_CERT, + sslCAFile: OCSP_CA_PEM, + sslAllowInvalidHostnames: "", + setParameter: { + "failpoint.disableStapling": "{'mode':'alwaysOn'}", + "ocspEnabled": "true", + "enableDetailedConnectionHealthMetricLogLines": connectionHealthLoggingOn + } + }; }; -let mock_ocsp = new MockOCSPServer("", 1); -mock_ocsp.start(); - -let conn = MongoRunner.runMongod(ocsp_options); - -clearRawMongoProgramOutput(); -// The desired log line will be printed by the shell. We run a parallel shell because -// 'rawMongoProgramOutput' will only return logs for subprocesses spawned by the shell. -const runParallelShellSuccess = startParallelShell( - () => { - jsTestLog( - "Established connection with server to test successful certification verification."); - }, - conn.port, - null /*noConnect */, - "--tls", - "--tlsCAFile", - OCSP_CA_PEM, - "--tlsCertificateKeyFile", - OCSP_CLIENT_CERT, - "--tlsAllowInvalidHostnames", - "--verbose", - 1); - -runParallelShellSuccess(); -let output = rawMongoProgramOutput(); -assert.gte(output.search(/"id":6840101/), 0, output); - -mock_ocsp.stop(); - -jsTestLog("Restarting MockOCSPServer with FAULT_REVOKED option"); -mock_ocsp = new MockOCSPServer(FAULT_REVOKED, 1); -mock_ocsp.start(); - -clearRawMongoProgramOutput(); -jsTestLog("Spawning parallel shell that should throw due to revoked OCSP certificate"); -assert.throws(startParallelShell( - () => { - jsTestLog("Something went wrong if we print this!"); - }, - conn.port, - null /*noConnect */, - "--tls", - "--tlsCAFile", - OCSP_CA_PEM, - "--tlsCertificateKeyFile", - OCSP_CLIENT_CERT, - "--tlsAllowInvalidHostnames", - "--verbose", - 1)); - -output = rawMongoProgramOutput(); -// Assert that the shell fails due to certificate being revoked, and we still measure OCSP responder -// latency. -assert.gte(output.search(/OCSPCertificateStatusRevoked/), 0); -assert.gte(output.search(/"id":6840101/), 0); - -MongoRunner.stopMongod(conn); - -// The mongoRunner spawns a new Mongo Object to validate the collections which races -// with the shutdown logic of the mock_ocsp responder on some platforms. We need this -// sleep to make sure that the threads don't interfere with each other. -sleep(1000); -mock_ocsp.stop(); -}()); +let runTest = (options) => { + const {ocspFaultType = "", connectionHealthLoggingOn} = options; + + let mock_ocsp = new MockOCSPServer("", 1); + mock_ocsp.start(); + + let conn = MongoRunner.runMongod(mongodOptions(connectionHealthLoggingOn)); + + let loggingShellArg = + `enableDetailedConnectionHealthMetricLogLines=${connectionHealthLoggingOn}`; + + clearRawMongoProgramOutput(); + // The desired log line will be printed by the shell. We run a parallel shell because + // 'rawMongoProgramOutput' will only return logs for subprocesses spawned by the shell. + let runParallelShellSuccess = startParallelShell( + () => { + jsTestLog( + "Established connection with server to test successful certification verification."); + }, + conn.port, + null /*noConnect */, + "--setShellParameter", + loggingShellArg, + "--tls", + "--tlsCAFile", + OCSP_CA_PEM, + "--tlsCertificateKeyFile", + OCSP_CLIENT_CERT, + "--tlsAllowInvalidHostnames", + "--verbose", + 1); + runParallelShellSuccess(); + + const successOutput = rawMongoProgramOutput(); + let failOutput; + + if (ocspFaultType != "") { + mock_ocsp.stop(); + + jsTestLog(`Restarting MockOCSPServer with ${ocspFaultType} option`); + mock_ocsp = new MockOCSPServer(ocspFaultType, 1); + mock_ocsp.start(); + + clearRawMongoProgramOutput(); + + assert.throws(startParallelShell( + (ocspFaultType) => { + jsTestLog("Something went wrong if we print this! Fault type: " + ocspFaultType); + }, + conn.port, + null /*noConnect */, + "--setShellParameter", + loggingShellArg, + "--tls", + "--tlsCAFile", + OCSP_CA_PEM, + "--tlsCertificateKeyFile", + OCSP_CLIENT_CERT, + "--tlsAllowInvalidHostnames", + "--verbose", + 1)); + + failOutput = rawMongoProgramOutput(); + } + + if (ocspFaultType == FAULT_REVOKED) { + // Assert that the shell fails due to certificate being revoked, and we still measure + // OCSP responder latency if logging is enabled. + assert.gt(failOutput.search(/OCSPCertificateStatusRevoked/), 0); + } + + // This is javascript's string search -- returns the first position of the regex string in + // the source string if there is a match, else returns -1. + if (connectionHealthLoggingOn) { + assert.gte(successOutput.search(/"id":6840101/), 0, successOutput); + } else { + assert.eq(successOutput.search(/"id":6840101/), -1, successOutput); + } + + if (failOutput) { + if (connectionHealthLoggingOn) { + assert.gte(failOutput.search(/"id":6840101/), 0, failOutput); + } else { + assert.eq(failOutput.search(/"id":6840101/), -1, failOutput); + } + } + + MongoRunner.stopMongod(conn); + + sleep(1000); + mock_ocsp.stop(); +}; + +runTest({connectionHealthLoggingOn: true}); +runTest({connectionHealthLoggingOn: false}); +runTest({ocspFaultType: FAULT_REVOKED, connectionHealthLoggingOn: true}); +}()); \ No newline at end of file diff --git a/jstests/sharding/ingress_handshake_and_auth_metrics_mongos.js b/jstests/sharding/ingress_handshake_and_auth_metrics_mongos.js index dcc6f3c00f4..5e2752d5ddc 100644 --- a/jstests/sharding/ingress_handshake_and_auth_metrics_mongos.js +++ b/jstests/sharding/ingress_handshake_and_auth_metrics_mongos.js @@ -8,19 +8,26 @@ load('jstests/libs/ingress_handshake_metrics_helpers.js'); -let st = new ShardingTest({shards: 0, other: {auth: ''}}); -let conn = st.s; +let runTest = (connectionHealthLoggingOn) => { + let st = new ShardingTest({shards: 0, other: {auth: ''}}); + let conn = st.s; -jsTestLog("Setting up users and test data."); -let runTest = ingressHandshakeMetricsTest(conn, { - preAuthDelayMillis: 50, - postAuthDelayMillis: 100, - helloProcessingDelayMillis: 50, - helloResponseDelayMillis: 100 -}); + jsTestLog("Setting up users and test data."); + let runMetricsTest = ingressHandshakeMetricsTest(conn, { + connectionHealthLoggingOn: connectionHealthLoggingOn, + preAuthDelayMillis: 50, + postAuthDelayMillis: 100, + helloProcessingDelayMillis: 50, + helloResponseDelayMillis: 100 + }); -jsTestLog("Connecting to mongos and running the test."); -runTest(); + jsTestLog("Connecting to mongos and running the test."); + runMetricsTest(); -st.stop(); + st.stop(); +}; + +// Parameterized on enabling/disabling connection health logging. +runTest(true); +runTest(false); })(); diff --git a/jstests/ssl/ssl_ingress_conn_metrics.js b/jstests/ssl/ssl_ingress_conn_metrics.js index 3ba574b2967..333d3015a90 100644 --- a/jstests/ssl/ssl_ingress_conn_metrics.js +++ b/jstests/ssl/ssl_ingress_conn_metrics.js @@ -20,14 +20,21 @@ if (determineSSLProvider() === "openssl" && detectDefaultTLSProtocol() !== "TLS1 // openSSL is being used. This can be different on Windows/OSX implementations. let cipherSuite = "TLS_AES_256_GCM_SHA384"; -const tlsOptions = { - tlsMode: "requireTLS", - tlsCertificateKeyFile: "jstests/libs/server.pem", - tlsCAFile: "jstests/libs/ca.pem", - setParameter: {opensslCipherSuiteConfig: cipherSuite}, +const mongodOptions = (connectionHealthLoggingOn) => { + let options = { + tlsMode: "requireTLS", + tlsCertificateKeyFile: "jstests/libs/server.pem", + tlsCAFile: "jstests/libs/ca.pem", + setParameter: { + opensslCipherSuiteConfig: cipherSuite, + enableDetailedConnectionHealthMetricLogLines: connectionHealthLoggingOn + }, + }; + + return options; }; -function testConn() { +function testConn(mongod) { const mongo = runMongoProgram('mongo', '--host', 'localhost', @@ -43,49 +50,71 @@ function testConn() { return mongo === 0; } -jsTestLog("Establishing connection to mongod"); -const mongod = MongoRunner.runMongod(Object.merge(tlsOptions)); -let ssNetworkMetrics = mongod.adminCommand({serverStatus: 1}).metrics.network; -let initialHandshakeTimeMillis = ssNetworkMetrics.totalIngressTLSHandshakeTimeMillis; -jsTestLog(`totalTLSHandshakeTimeMillis: ${initialHandshakeTimeMillis}`); -checkLog.containsJson(mongod, 6723804, {durationMillis: Number(initialHandshakeTimeMillis)}); -assert.commandWorked(mongod.adminCommand({clearLog: 'global'})); -assert.eq(1, ssNetworkMetrics.totalIngressTLSConnections, ssNetworkMetrics); - -// Get the logId that corresponds to the implementation of TLS being used. -let logId; -switch (determineSSLProvider()) { - case "openssl": - logId = 6723801; - break; - case "windows": - logId = 6723802; - // This cipher is chosen to represent the cipher negotiated by Windows Server 2019 by - // default. - cipherSuite = "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384"; - break; - case "apple": - logId = 6723803; - // We log only the cipher represented as its enum value in this code path. This corresponds - // to the hex value 0xC030 which maps to the cipher suite - // "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384". This cipher is chosen by OSX 12.1 by default. - cipherSuite = 49200; - break; - default: - assert(false, "Failed to determine that we are using a supported SSL provider"); -} +let runTest = (connectionHealthLoggingOn) => { + jsTestLog("Establishing connection to mongod"); + let mongod = MongoRunner.runMongod(Object.merge(mongodOptions(connectionHealthLoggingOn))); + let ssNetworkMetrics = mongod.adminCommand({serverStatus: 1}).metrics.network; + let initialHandshakeTimeMillis = ssNetworkMetrics.totalIngressTLSHandshakeTimeMillis; + jsTestLog(`totalTLSHandshakeTimeMillis: ${initialHandshakeTimeMillis}`); + + if (connectionHealthLoggingOn) { + checkLog.containsJson( + mongod, 6723804, {durationMillis: Number(initialHandshakeTimeMillis)}); + } else { + assert.eq(checkLog.checkContainsOnceJson(mongod, 6723804, {}), false); + } + + assert.commandWorked(mongod.adminCommand({clearLog: 'global'})); + assert.eq(1, ssNetworkMetrics.totalIngressTLSConnections, ssNetworkMetrics); + + // Get the logId that corresponds to the implementation of TLS being used. + let logId; + switch (determineSSLProvider()) { + case "openssl": + logId = 6723801; + break; + case "windows": + logId = 6723802; + // This cipher is chosen to represent the cipher negotiated by Windows Server 2019 + // by default. + cipherSuite = "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384"; + break; + case "apple": + logId = 6723803; + // We log only the cipher represented as its enum value in this code path. This + // corresponds to the hex value 0xC030 which maps to the cipher suite + // "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384". This cipher is chosen by OSX 12.1 by + // default. + cipherSuite = 49200; + break; + default: + assert(false, "Failed to determine that we are using a supported SSL provider"); + } + + // Start a new connection to check that 'durationMicros' is cumulatively measured in server + // status. + assert.soon(() => testConn(mongod), "Couldn't connect to mongod"); + ssNetworkMetrics = mongod.adminCommand({serverStatus: 1}).metrics.network; + let totalTLSHandshakeTimeMillis = ssNetworkMetrics.totalIngressTLSHandshakeTimeMillis; + jsTestLog(`totalTLSHandshakeTimeMillis: ${totalTLSHandshakeTimeMillis}`); + let secondHandshakeDuration = totalTLSHandshakeTimeMillis - initialHandshakeTimeMillis; + + if (connectionHealthLoggingOn) { + checkLog.containsJson(mongod, 6723804, {durationMillis: Number(secondHandshakeDuration)}); + assert.soon(() => checkLog.checkContainsOnceJson(mongod, logId, {"cipher": cipherSuite}), + "failed waiting for log line with negotiated cipher info"); + } else { + assert.eq(checkLog.checkContainsOnceJson(mongod, 6723804, {}), false); + assert.eq(checkLog.checkContainsOnceJson(mongod, logId, {}), false); + } + + assert.gt(totalTLSHandshakeTimeMillis, initialHandshakeTimeMillis); + assert.eq(2, ssNetworkMetrics.totalIngressTLSConnections, ssNetworkMetrics); + + MongoRunner.stopMongod(mongod); +}; -// Start a new connection to check that 'durationMicros' is cumulatively measured in server status. -assert.soon(testConn, "Couldn't connect to mongod"); -ssNetworkMetrics = mongod.adminCommand({serverStatus: 1}).metrics.network; -let totalTLSHandshakeTimeMillis = ssNetworkMetrics.totalIngressTLSHandshakeTimeMillis; -jsTestLog(`totalTLSHandshakeTimeMillis: ${totalTLSHandshakeTimeMillis}`); -let secondHandshakeDuration = totalTLSHandshakeTimeMillis - initialHandshakeTimeMillis; -checkLog.containsJson(mongod, 6723804, {durationMillis: Number(secondHandshakeDuration)}); -assert.soon(() => checkLog.checkContainsOnceJson(mongod, logId, {"cipher": cipherSuite}), - "failed waiting for log line with negotiated cipher info"); -assert.gt(totalTLSHandshakeTimeMillis, initialHandshakeTimeMillis); -assert.eq(2, ssNetworkMetrics.totalIngressTLSConnections, ssNetworkMetrics); - -MongoRunner.stopMongod(mongod); +// Parameterized on turning connection health logging on/off. +runTest(true); +runTest(false); }()); \ No newline at end of file diff --git a/src/mongo/client/SConscript b/src/mongo/client/SConscript index 044f612e57f..c88bb4db28e 100644 --- a/src/mongo/client/SConscript +++ b/src/mongo/client/SConscript @@ -267,6 +267,7 @@ env.Library( ], LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/db/commands/test_commands_enabled', + '$BUILD_DIR/mongo/db/connection_health_metrics_parameter', '$BUILD_DIR/mongo/db/server_feature_flags', '$BUILD_DIR/mongo/db/stats/counters', '$BUILD_DIR/mongo/executor/egress_tag_closer_manager', diff --git a/src/mongo/client/async_client.cpp b/src/mongo/client/async_client.cpp index 3d30e173089..0f74acc733e 100644 --- a/src/mongo/client/async_client.cpp +++ b/src/mongo/client/async_client.cpp @@ -41,6 +41,7 @@ #include "mongo/db/auth/sasl_command_constants.h" #include "mongo/db/commands/server_status_metric.h" #include "mongo/db/commands/test_commands_enabled.h" +#include "mongo/db/connection_health_metrics_parameter_gen.h" #include "mongo/db/dbmessage.h" #include "mongo/db/server_feature_flags_gen.h" #include "mongo/db/server_options.h" @@ -315,18 +316,20 @@ Future AsyncDBClient::runCommand( const auto timeElapsedMicros = durationCount(fromConnAcquiredTimer.get()->elapsed()); totalTimeForEgressConnectionAcquiredToWireMicros.increment(timeElapsedMicros); - if (timeElapsedMicros >= 1000 || - MONGO_unlikely(alwaysLogConnAcquisitionToWireTime.shouldFail())) { - // Log slow acquisition times at info level but rate limit it to prevent spamming - // users. - static auto& logSeverity = *new logv2::SeveritySuppressor{ - Seconds{1}, logv2::LogSeverity::Info(), logv2::LogSeverity::Debug(2)}; - LOGV2_DEBUG( - 6496702, - logSeverity().toInt(), - "Acquired connection for remote operation and completed writing to wire", - "durationMicros"_attr = timeElapsedMicros); + + if ((!gEnableDetailedConnectionHealthMetricLogLines || timeElapsedMicros < 1000) && + !MONGO_unlikely(alwaysLogConnAcquisitionToWireTime.shouldFail())) { + return; } + + // Log slow acquisition times at info level but rate limit it to prevent spamming + // users. + static auto& logSeverity = *new logv2::SeveritySuppressor{ + Seconds{1}, logv2::LogSeverity::Info(), logv2::LogSeverity::Debug(2)}; + LOGV2_DEBUG(6496702, + logSeverity().toInt(), + "Acquired connection for remote operation and completed writing to wire", + "durationMicros"_attr = timeElapsedMicros); } }; diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript index d198bd8e5e3..446611ce0d1 100644 --- a/src/mongo/db/SConscript +++ b/src/mongo/db/SConscript @@ -304,6 +304,7 @@ env.Library( LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/db/auth/cluster_auth_mode', '$BUILD_DIR/mongo/util/options_parser/options_parser', + 'connection_health_metrics_parameter', 'server_base', ], ) @@ -353,6 +354,17 @@ env.Library( ], ) +env.Library( + target="connection_health_metrics_parameter", + source=[ + "connection_health_metrics_parameter.idl", + ], + LIBDEPS_PRIVATE=[ + '$BUILD_DIR/mongo/db/server_base', + '$BUILD_DIR/mongo/util/options_parser/options_parser', + ], +) + # mongod options env.Library( target="mongod_options", diff --git a/src/mongo/db/auth/SConscript b/src/mongo/db/auth/SConscript index 4a9dc2ab929..b57fcfc5eb7 100644 --- a/src/mongo/db/auth/SConscript +++ b/src/mongo/db/auth/SConscript @@ -62,6 +62,7 @@ env.Library( ], LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/db/audit', + '$BUILD_DIR/mongo/db/connection_health_metrics_parameter', '$BUILD_DIR/mongo/db/service_context', '$BUILD_DIR/mongo/db/stats/counters', ], @@ -441,6 +442,7 @@ env.Library( 'user', ], LIBDEPS_PRIVATE=[ + '$BUILD_DIR/mongo/db/connection_health_metrics_parameter', '$BUILD_DIR/mongo/db/server_base', '$BUILD_DIR/mongo/db/service_context', 'cluster_auth_mode', diff --git a/src/mongo/db/auth/authentication_session.cpp b/src/mongo/db/auth/authentication_session.cpp index 55314d50cbc..c4fde9cd285 100644 --- a/src/mongo/db/auth/authentication_session.cpp +++ b/src/mongo/db/auth/authentication_session.cpp @@ -33,6 +33,7 @@ #include "mongo/db/audit.h" #include "mongo/db/auth/authentication_metrics.h" #include "mongo/db/client.h" +#include "mongo/db/connection_health_metrics_parameter_gen.h" #include "mongo/logv2/log.h" #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kAccessControl @@ -339,16 +340,18 @@ void AuthenticationSession::markSuccessful() { BSONObj metrics = _metricsRecorder.capture(); - LOGV2(5286306, - "Successfully authenticated", - "client"_attr = _client->getRemote(), - "isSpeculative"_attr = _isSpeculative, - "isClusterMember"_attr = _isClusterMember, - "mechanism"_attr = _mechName, - "user"_attr = _userName.getUser(), - "db"_attr = _userName.getDB(), - "result"_attr = Status::OK().code(), - "metrics"_attr = metrics); + if (gEnableDetailedConnectionHealthMetricLogLines) { + LOGV2(5286306, + "Successfully authenticated", + "client"_attr = _client->getRemote(), + "isSpeculative"_attr = _isSpeculative, + "isClusterMember"_attr = _isClusterMember, + "mechanism"_attr = _mechName, + "user"_attr = _userName.getUser(), + "db"_attr = _userName.getDB(), + "result"_attr = Status::OK().code(), + "metrics"_attr = metrics); + } } void AuthenticationSession::markFailed(const Status& status) { @@ -363,17 +366,19 @@ void AuthenticationSession::markFailed(const Status& status) { BSONObj metrics = _metricsRecorder.capture(); - LOGV2(5286307, - "Failed to authenticate", - "client"_attr = _client->getRemote(), - "isSpeculative"_attr = _isSpeculative, - "isClusterMember"_attr = _isClusterMember, - "mechanism"_attr = _mechName, - "user"_attr = _userName.getUser(), - "db"_attr = _userName.getDB(), - "error"_attr = status, - "result"_attr = status.code(), - "metrics"_attr = metrics); + if (gEnableDetailedConnectionHealthMetricLogLines) { + LOGV2(5286307, + "Failed to authenticate", + "client"_attr = _client->getRemote(), + "isSpeculative"_attr = _isSpeculative, + "isClusterMember"_attr = _isClusterMember, + "mechanism"_attr = _mechName, + "user"_attr = _userName.getUser(), + "db"_attr = _userName.getDB(), + "error"_attr = status, + "result"_attr = status.code(), + "metrics"_attr = metrics); + } } } // namespace mongo diff --git a/src/mongo/db/auth/sasl_mechanism_registry.cpp b/src/mongo/db/auth/sasl_mechanism_registry.cpp index e3136ddfbe4..fd2f45363be 100644 --- a/src/mongo/db/auth/sasl_mechanism_registry.cpp +++ b/src/mongo/db/auth/sasl_mechanism_registry.cpp @@ -35,6 +35,7 @@ #include "mongo/client/authenticate.h" #include "mongo/db/auth/sasl_options.h" #include "mongo/db/auth/user.h" +#include "mongo/db/connection_health_metrics_parameter_gen.h" #include "mongo/logv2/log.h" #include "mongo/util/exit_code.h" #include "mongo/util/icu.h" @@ -104,12 +105,15 @@ void SASLServerMechanismRegistry::advertiseMechanismNamesForUser(OperationContex UserHandle user; const auto swUser = [&] { - ScopedCallbackTimer timer([&](Microseconds elapsed) { - LOGV2(6788603, - "Auth metrics report", - "metric"_attr = "sasl_acquireUser", - "micros"_attr = elapsed.count()); - }); + if (gEnableDetailedConnectionHealthMetricLogLines) { + ScopedCallbackTimer timer([&](Microseconds elapsed) { + LOGV2(6788603, + "Auth metrics report", + "metric"_attr = "sasl_acquireUser", + "micros"_attr = elapsed.count()); + }); + } + return authManager->acquireUser(opCtx, UserRequest(userName, boost::none)); }(); diff --git a/src/mongo/db/auth/sasl_plain_server_conversation.cpp b/src/mongo/db/auth/sasl_plain_server_conversation.cpp index cf3ed62176a..2a579bb3f5a 100644 --- a/src/mongo/db/auth/sasl_plain_server_conversation.cpp +++ b/src/mongo/db/auth/sasl_plain_server_conversation.cpp @@ -39,6 +39,7 @@ #include "mongo/crypto/mechanism_scram.h" #include "mongo/db/auth/sasl_mechanism_registry.h" #include "mongo/db/auth/user.h" +#include "mongo/db/connection_health_metrics_parameter_gen.h" #include "mongo/logv2/log.h" #include "mongo/util/base64.h" #include "mongo/util/password_digest.h" @@ -129,12 +130,15 @@ StatusWith> SASLPlainServerMechanism::stepImpl( // The authentication database is also the source database for the user. auto swUser = [&]() { - ScopedCallbackTimer timer([&](Microseconds elapsed) { - LOGV2(6788606, - "Auth metrics report", - "metric"_attr = "plain_acquireUser", - "micros"_attr = elapsed.count()); - }); + if (gEnableDetailedConnectionHealthMetricLogLines) { + ScopedCallbackTimer timer([&](Microseconds elapsed) { + LOGV2(6788606, + "Auth metrics report", + "metric"_attr = "plain_acquireUser", + "micros"_attr = elapsed.count()); + }); + } + return authManager->acquireUser(opCtx, getUserRequest()); }(); diff --git a/src/mongo/db/auth/sasl_scram_server_conversation.cpp b/src/mongo/db/auth/sasl_scram_server_conversation.cpp index eb1cbf2287d..109de067ea2 100644 --- a/src/mongo/db/auth/sasl_scram_server_conversation.cpp +++ b/src/mongo/db/auth/sasl_scram_server_conversation.cpp @@ -43,6 +43,7 @@ #include "mongo/db/auth/sasl_mechanism_policies.h" #include "mongo/db/auth/sasl_mechanism_registry.h" #include "mongo/db/auth/sasl_options.h" +#include "mongo/db/connection_health_metrics_parameter_gen.h" #include "mongo/logv2/log.h" #include "mongo/platform/random.h" #include "mongo/util/base64.h" @@ -207,12 +208,15 @@ StatusWith> SaslSCRAMServerMechanism::_fir auto authManager = AuthorizationManager::get(opCtx->getServiceContext()); auto swUser = [&]() { - ScopedCallbackTimer timer([&](Microseconds elapsed) { - LOGV2(6788604, - "Auth metrics report", - "metric"_attr = "acquireUser", - "micros"_attr = elapsed.count()); - }); + if (gEnableDetailedConnectionHealthMetricLogLines) { + ScopedCallbackTimer timer([&](Microseconds elapsed) { + LOGV2(6788604, + "Auth metrics report", + "metric"_attr = "acquireUser", + "micros"_attr = elapsed.count()); + }); + } + return authManager->acquireUser(opCtx, UserRequest(user, boost::none)); }(); diff --git a/src/mongo/db/connection_health_metrics_parameter.idl b/src/mongo/db/connection_health_metrics_parameter.idl new file mode 100644 index 00000000000..79a94753019 --- /dev/null +++ b/src/mongo/db/connection_health_metrics_parameter.idl @@ -0,0 +1,40 @@ + +# Copyright (C) 2023-present MongoDB, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the Server Side Public License, version 1, +# as published by MongoDB, Inc. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Server Side Public License for more details. +# +# You should have received a copy of the Server Side Public License +# along with this program. If not, see +# . +# +# As a special exception, the copyright holders give permission to link the +# code of portions of this program with the OpenSSL library under certain +# conditions as described in each individual source file and distribute +# linked combinations including the program with the OpenSSL library. You +# must comply with the Server Side Public License in all respects for +# all of the code used other than as permitted herein. If you modify file(s) +# with this exception, you may extend this exception to your version of the +# file(s), but you are not obligated to do so. If you do not wish to do so, +# delete this exception statement from your version. If you delete this +# exception statement from all source files in the program, then also delete +# it in the license file. +# + +global: + cpp_namespace: "mongo" + +server_parameters: + enableDetailedConnectionHealthMetricLogLines: + description: >- + Enables detailed log lines related to connection health in a cluster. + set_at: [ startup, runtime ] + cpp_vartype: bool + cpp_varname: "gEnableDetailedConnectionHealthMetricLogLines" + default: true diff --git a/src/mongo/executor/connection_pool_tl.cpp b/src/mongo/executor/connection_pool_tl.cpp index feaf9fd9ae9..b40f096ef4d 100644 --- a/src/mongo/executor/connection_pool_tl.cpp +++ b/src/mongo/executor/connection_pool_tl.cpp @@ -37,6 +37,7 @@ #include "mongo/config.h" #include "mongo/db/auth/authorization_manager.h" #include "mongo/db/commands/server_status_metric.h" +#include "mongo/db/connection_health_metrics_parameter_gen.h" #include "mongo/db/server_feature_flags_gen.h" #include "mongo/executor/network_interface_tl_gen.h" #include "mongo/logv2/log.h" @@ -68,6 +69,10 @@ bool connHealthMetricsEnabled() { return gFeatureFlagConnHealthMetrics.isEnabledAndIgnoreFCV(); } +bool connHealthMetricsLoggingEnabled() { + return gEnableDetailedConnectionHealthMetricLogLines; +} + void logSlowConnection(const HostAndPort& peer, const ConnectionMetrics& connMetrics) { static auto& severitySuppressor = *makeSeveritySuppressor().release(); LOGV2_DEBUG(6496400, @@ -434,14 +439,16 @@ void TLConnection::setup(Milliseconds timeout, SetupCallback cb, std::string ins if (status.isOK()) { if (connHealthMetricsEnabled()) { totalConnectionEstablishmentTime.increment(_connMetrics.total().count()); - if (_connMetrics.total() >= - Milliseconds(gSlowConnectionThresholdMillis.load())) { + if (connHealthMetricsLoggingEnabled() && + _connMetrics.total() >= + Milliseconds(gSlowConnectionThresholdMillis.load())) { logSlowConnection(_peer, _connMetrics); } } handler->promise.emplaceValue(); } else { - if (ErrorCodes::isNetworkTimeoutError(status) && connHealthMetricsEnabled()) { + if (ErrorCodes::isNetworkTimeoutError(status) && connHealthMetricsEnabled() && + connHealthMetricsLoggingEnabled()) { logSlowConnection(_peer, _connMetrics); } LOGV2_DEBUG(22584, diff --git a/src/mongo/executor/network_interface_tl.cpp b/src/mongo/executor/network_interface_tl.cpp index ecb518b32af..73348bb60e7 100644 --- a/src/mongo/executor/network_interface_tl.cpp +++ b/src/mongo/executor/network_interface_tl.cpp @@ -35,6 +35,7 @@ #include "mongo/config.h" #include "mongo/db/auth/validated_tenancy_scope.h" #include "mongo/db/commands/server_status_metric.h" +#include "mongo/db/connection_health_metrics_parameter_gen.h" #include "mongo/db/server_feature_flags_gen.h" #include "mongo/db/server_options.h" #include "mongo/db/wire_version.h" @@ -472,10 +473,12 @@ void NetworkInterfaceTL::CommandStateBase::setTimer() { const auto timeoutCode = requestOnAny.timeoutCode; if (nowVal >= deadline) { connTimeoutWaitTime = stopwatch.elapsed(); - LOGV2(6496501, - "Operation timed out while waiting to acquire connection", - "requestId"_attr = requestOnAny.id, - "duration"_attr = connTimeoutWaitTime); + if (gEnableDetailedConnectionHealthMetricLogLines) { + LOGV2(6496501, + "Operation timed out while waiting to acquire connection", + "requestId"_attr = requestOnAny.id, + "duration"_attr = connTimeoutWaitTime); + } uasserted(timeoutCode, str::stream() << "Remote command timed out while waiting to get a " "connection from the pool, took " @@ -844,10 +847,12 @@ void NetworkInterfaceTL::RequestManager::trySend( if (cmdState->finishLine.arriveStrongly()) { if (swConn.getStatus() == cmdState->requestOnAny.timeoutCode) { cmdState->connTimeoutWaitTime = cmdState->stopwatch.elapsed(); - LOGV2(6496500, - "Operation timed out while waiting to acquire connection", - "requestId"_attr = cmdState->requestOnAny.id, - "duration"_attr = cmdState->connTimeoutWaitTime); + if (gEnableDetailedConnectionHealthMetricLogLines) { + LOGV2(6496500, + "Operation timed out while waiting to acquire connection", + "requestId"_attr = cmdState->requestOnAny.id, + "duration"_attr = cmdState->connTimeoutWaitTime); + } } auto& reactor = cmdState->interface->_reactor; diff --git a/src/mongo/shell/shell_options.cpp b/src/mongo/shell/shell_options.cpp index f22556d8347..4060b157fd7 100644 --- a/src/mongo/shell/shell_options.cpp +++ b/src/mongo/shell/shell_options.cpp @@ -73,6 +73,7 @@ const std::set kSetShellParameterAllowlist = { "ocspEnabled", "skipShellCursorFinalize", "tlsOCSPSlowResponderWarningSecs", + "enableDetailedConnectionHealthMetricLogLines", }; std::string getMongoShellHelp(StringData name, const moe::OptionSection& options) { diff --git a/src/mongo/transport/SConscript b/src/mongo/transport/SConscript index 1f7d42e7491..dcdc16b2595 100644 --- a/src/mongo/transport/SConscript +++ b/src/mongo/transport/SConscript @@ -13,6 +13,7 @@ env.Library( ], LIBDEPS=[ '$BUILD_DIR/mongo/base', + '$BUILD_DIR/mongo/db/connection_health_metrics_parameter', '$BUILD_DIR/mongo/db/service_context', '$BUILD_DIR/mongo/db/wire_version', '$BUILD_DIR/mongo/util/net/ssl_manager', diff --git a/src/mongo/transport/asio/asio_session.cpp b/src/mongo/transport/asio/asio_session.cpp index 4db6a90681c..4aa1a286b93 100644 --- a/src/mongo/transport/asio/asio_session.cpp +++ b/src/mongo/transport/asio/asio_session.cpp @@ -32,6 +32,7 @@ #include "mongo/config.h" #include "mongo/db/commands/server_status_metric.h" +#include "mongo/db/connection_health_metrics_parameter_gen.h" #include "mongo/db/server_feature_flags_gen.h" #include "mongo/logv2/log.h" #include "mongo/transport/asio/asio_utils.h" @@ -733,9 +734,11 @@ Future AsioSession::maybeHandshakeSSLForIngress(const MutableBufferSequenc LOGV2_DEBUG(4908001, 2, "Client connected without SNI extension"); } const auto handshakeDurationMillis = durationCount(startTimer.elapsed()); - LOGV2(6723804, - "Ingress TLS handshake complete", - "durationMillis"_attr = handshakeDurationMillis); + if (gEnableDetailedConnectionHealthMetricLogLines) { + LOGV2(6723804, + "Ingress TLS handshake complete", + "durationMillis"_attr = handshakeDurationMillis); + } totalIngressTLSConnections.increment(1); totalIngressTLSHandshakeTimeMillis.increment(handshakeDurationMillis); if (SSLPeerInfo::forSession(shared_from_this()).subjectName().empty()) { diff --git a/src/mongo/transport/ingress_handshake_metrics.cpp b/src/mongo/transport/ingress_handshake_metrics.cpp index e4072cc0f8b..471c74492e7 100644 --- a/src/mongo/transport/ingress_handshake_metrics.cpp +++ b/src/mongo/transport/ingress_handshake_metrics.cpp @@ -29,6 +29,7 @@ #include "mongo/transport/ingress_handshake_metrics.h" #include "mongo/db/commands/server_status_metric.h" +#include "mongo/db/connection_health_metrics_parameter_gen.h" #include "mongo/db/server_feature_flags_gen.h" #include "mongo/logv2/log.h" #include "mongo/util/time_support.h" @@ -44,6 +45,10 @@ bool connHealthMetricsEnabled() { return gFeatureFlagConnHealthMetrics.isEnabledAndIgnoreFCV(); } +bool connHealthMetricsLoggingEnabled() { + return gEnableDetailedConnectionHealthMetricLogLines; +} + CounterMetric totalTimeToFirstNonAuthCommandMillis("network.totalTimeToFirstNonAuthCommandMillis", connHealthMetricsEnabled); } // namespace @@ -85,9 +90,12 @@ void IngressHandshakeMetrics::onCommandReceived(const Command* command) { auto lastAuthOrStartTicks = _lastHandshakeCommandTicks.value_or(*_sessionStartedTicks); auto elapsedSinceAuth = now - lastAuthOrStartTicks; - LOGV2(6788700, - "Received first command on ingress connection since session start or auth handshake", - "elapsed"_attr = _tickSource->ticksTo(elapsedSinceAuth)); + + if (connHealthMetricsLoggingEnabled()) { + LOGV2(6788700, + "Received first command on ingress connection since session start or auth handshake", + "elapsed"_attr = _tickSource->ticksTo(elapsedSinceAuth)); + } auto elapsedSinceStart = now - *_sessionStartedTicks; totalTimeToFirstNonAuthCommandMillis.increment( @@ -114,7 +122,8 @@ void IngressHandshakeMetrics::onCommandProcessed(const Command* command, void IngressHandshakeMetrics::onResponseSent(Milliseconds processingDuration, Milliseconds sendingDuration) { - if (MONGO_likely(_helloSucceeded || !_helloReceivedTime || !connHealthMetricsEnabled())) + if (MONGO_likely(_helloSucceeded || !_helloReceivedTime) || !connHealthMetricsEnabled() || + !connHealthMetricsLoggingEnabled()) return; LOGV2(6724100, diff --git a/src/mongo/transport/session_workflow.cpp b/src/mongo/transport/session_workflow.cpp index 98b58cf42c8..e5e46a12393 100644 --- a/src/mongo/transport/session_workflow.cpp +++ b/src/mongo/transport/session_workflow.cpp @@ -39,6 +39,7 @@ #include "mongo/config.h" #include "mongo/db/client.h" #include "mongo/db/client_strand.h" +#include "mongo/db/connection_health_metrics_parameter_gen.h" #include "mongo/db/dbmessage.h" #include "mongo/db/query/kill_cursors_gen.h" #include "mongo/db/stats/counters.h" @@ -204,6 +205,10 @@ struct SplitTimerPolicy { BSONObjBuilder bob; splitTimer->appendIntervals(bob); + if (!gEnableDetailedConnectionHealthMetricLogLines) { + return; + } + logv2::LogSeverity severity = sessionWorkflowDelaySendMessage.shouldFail() ? logv2::LogSeverity::Info() : _sep->slowSessionWorkflowLogSeverity(); diff --git a/src/mongo/util/net/SConscript b/src/mongo/util/net/SConscript index 7f1473f0a66..1118fc2b89a 100644 --- a/src/mongo/util/net/SConscript +++ b/src/mongo/util/net/SConscript @@ -123,6 +123,7 @@ if not get_option('ssl') == 'off': '$BUILD_DIR/mongo/base/secure_allocator', '$BUILD_DIR/mongo/crypto/sha_block_${MONGO_CRYPTO}', '$BUILD_DIR/mongo/db/auth/cluster_auth_mode', + '$BUILD_DIR/mongo/db/connection_health_metrics_parameter', '$BUILD_DIR/mongo/db/server_base', '$BUILD_DIR/mongo/db/service_context', '$BUILD_DIR/mongo/util/background_job', diff --git a/src/mongo/util/net/ssl_manager_apple.cpp b/src/mongo/util/net/ssl_manager_apple.cpp index 52323196f7a..815016ccfaf 100644 --- a/src/mongo/util/net/ssl_manager_apple.cpp +++ b/src/mongo/util/net/ssl_manager_apple.cpp @@ -41,6 +41,7 @@ #include "mongo/base/status_with.h" #include "mongo/crypto/sha1_block.h" #include "mongo/crypto/sha256_block.h" +#include "mongo/db/connection_health_metrics_parameter_gen.h" #include "mongo/logv2/log.h" #include "mongo/platform/random.h" #include "mongo/transport/ssl_connection_context.h" @@ -1675,7 +1676,7 @@ Future SSLManagerApple::parseAndValidatePeerCertificate( // The cipher will be presented as a number. ::SSLCipherSuite cipher; uassertOSStatusOK(::SSLGetNegotiatedCipher(ssl, &cipher)); - if (!serverGlobalParams.quiet.load()) { + if (!serverGlobalParams.quiet.load() && gEnableDetailedConnectionHealthMetricLogLines) { LOGV2_INFO(6723803, "Accepted TLS connection from peer", "peerSubjectName"_attr = peerSubjectName, diff --git a/src/mongo/util/net/ssl_manager_openssl.cpp b/src/mongo/util/net/ssl_manager_openssl.cpp index 606dd1e8868..6e2eefebf9b 100644 --- a/src/mongo/util/net/ssl_manager_openssl.cpp +++ b/src/mongo/util/net/ssl_manager_openssl.cpp @@ -47,6 +47,7 @@ #include "mongo/base/secure_allocator.h" #include "mongo/bson/bsonobjbuilder.h" #include "mongo/config.h" +#include "mongo/db/connection_health_metrics_parameter_gen.h" #include "mongo/db/service_context.h" #include "mongo/logv2/log.h" #include "mongo/platform/atomic_word.h" @@ -1023,7 +1024,8 @@ Future dispatchOCSPRequests(SSL_CTX* context, // 2. The last OCSP response returns and the status of the certificate is still // unknown. ScopeGuard logLatencyGuard([requestLatency, purpose]() { - if (purpose != OCSPPurpose::kClientVerify) { + if (purpose != OCSPPurpose::kClientVerify || + !gEnableDetailedConnectionHealthMetricLogLines) { return; } LOGV2_INFO(6840101, @@ -3313,7 +3315,7 @@ Future SSLManagerOpenSSL::parseAndValidatePeerCertificate( // TODO: check optional cipher restriction, using cert. auto peerSubject = getCertificateSubjectX509Name(peerCert.get()); const auto cipher = SSL_get_current_cipher(conn); - if (!serverGlobalParams.quiet.load()) { + if (!serverGlobalParams.quiet.load() && gEnableDetailedConnectionHealthMetricLogLines) { LOGV2_INFO(6723801, "Accepted TLS connection from peer", "peerSubject"_attr = peerSubject, diff --git a/src/mongo/util/net/ssl_manager_windows.cpp b/src/mongo/util/net/ssl_manager_windows.cpp index 760b81b4e07..fadac733864 100644 --- a/src/mongo/util/net/ssl_manager_windows.cpp +++ b/src/mongo/util/net/ssl_manager_windows.cpp @@ -45,6 +45,7 @@ #include "mongo/bson/bsonobjbuilder.h" #include "mongo/bson/util/builder.h" #include "mongo/config.h" +#include "mongo/db/connection_health_metrics_parameter_gen.h" #include "mongo/db/server_options.h" #include "mongo/logv2/log.h" #include "mongo/platform/atomic_word.h" @@ -2067,7 +2068,7 @@ Future SSLManagerWindows::parseAndValidatePeerCertificate( } const auto cipher = std::wstring(cipherInfo.szCipherSuite); - if (!serverGlobalParams.quiet.load()) { + if (!serverGlobalParams.quiet.load() && gEnableDetailedConnectionHealthMetricLogLines) { LOGV2_INFO(6723802, "Accepted TLS connection from peer", "peerSubjectName"_attr = peerSubjectName, -- cgit v1.2.1