summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharlie Swanson <charlie.swanson@mongodb.com>2023-04-11 13:37:35 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-04-11 14:46:01 +0000
commitc0e7ce05c657ffef5da25aa0399737311f3d48dc (patch)
tree5f7648843679d17726c7b540e55734a5063e5425
parenta2722330e5a975b9692e69f744a9769006ff94ea (diff)
downloadmongo-c0e7ce05c657ffef5da25aa0399737311f3d48dc.tar.gz
SERVER-75596 Invoke $telemetry in the background of telemetry suite.
-rw-r--r--buildscripts/resmokeconfig/suites/telemetry_passthrough.yml1
-rw-r--r--buildscripts/resmokelib/testing/hooks/aggregate_metrics_background.py63
-rw-r--r--buildscripts/resmokelib/testing/hooks/background_job.py61
-rw-r--r--buildscripts/resmokelib/testing/hooks/jsfile.py2
-rw-r--r--buildscripts/resmokelib/testing/hooks/run_telemetry.py21
-rw-r--r--jstests/hooks/run_aggregate_metrics_background.js1
-rw-r--r--jstests/hooks/run_telemetry.js117
-rw-r--r--src/mongo/db/matcher/schema/expression_internal_schema_allowed_properties.cpp2
-rw-r--r--src/mongo/db/pipeline/document_source_telemetry.cpp18
-rw-r--r--src/mongo/db/pipeline/field_path.cpp4
-rw-r--r--src/mongo/db/query/telemetry.cpp6
11 files changed, 232 insertions, 64 deletions
diff --git a/buildscripts/resmokeconfig/suites/telemetry_passthrough.yml b/buildscripts/resmokeconfig/suites/telemetry_passthrough.yml
index 1aa2a490a5f..9e471accf67 100644
--- a/buildscripts/resmokeconfig/suites/telemetry_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/telemetry_passthrough.yml
@@ -22,6 +22,7 @@ executor:
- class: ValidateCollections
- class: CleanEveryN
n: 20
+ - class: RunTelemetry
fixture:
class: MongoDFixture
mongod_options:
diff --git a/buildscripts/resmokelib/testing/hooks/aggregate_metrics_background.py b/buildscripts/resmokelib/testing/hooks/aggregate_metrics_background.py
index d392c6ae0b8..b49030ff0f4 100644
--- a/buildscripts/resmokelib/testing/hooks/aggregate_metrics_background.py
+++ b/buildscripts/resmokelib/testing/hooks/aggregate_metrics_background.py
@@ -6,68 +6,17 @@ internally sleep for 1 second between runs.
import os.path
-from buildscripts.resmokelib import errors
-from buildscripts.resmokelib.testing.hooks import jsfile
-from buildscripts.resmokelib.testing.hooks.background_job import _BackgroundJob, _ContinuousDynamicJSTestCase
+from buildscripts.resmokelib.testing.hooks.background_job import BackgroundRepeatedJsHook
-class AggregateResourceConsumptionMetricsInBackground(jsfile.JSHook):
+class AggregateResourceConsumptionMetricsInBackground(BackgroundRepeatedJsHook):
"""A hook to run $operationMetrics stage in the background."""
- IS_BACKGROUND = True
-
def __init__(self, hook_logger, fixture, shell_options=None):
"""Initialize AggregateResourceConsumptionMetricsInBackground."""
+
description = "Run background $operationMetrics on all mongods while a test is running"
js_filename = os.path.join("jstests", "hooks", "run_aggregate_metrics_background.js")
- jsfile.JSHook.__init__(self, hook_logger, fixture, js_filename, description,
- shell_options=shell_options)
- self._background_job = None
-
- def before_suite(self, test_report):
- """Start the background thread."""
- self._background_job = _BackgroundJob("AggregateResourceConsumptionMetricsInBackground")
- self.logger.info("Starting the background aggregate metrics thread.")
- self._background_job.start()
-
- def after_suite(self, test_report, teardown_flag=None):
- """Signal the background aggregate metrics thread to exit, and wait until it does."""
- if self._background_job is None:
- return
-
- self.logger.info("Stopping the background aggregate metrics thread.")
- self._background_job.stop()
-
- def before_test(self, test, test_report):
- """Instruct the background aggregate metrics thread to run while 'test' is also running."""
- if self._background_job is None:
- return
-
- hook_test_case = _ContinuousDynamicJSTestCase.create_before_test(
- test.logger, test, self, self._js_filename, self._shell_options)
- hook_test_case.configure(self.fixture)
-
- self.logger.info("Resuming the background aggregate metrics thread.")
- self._background_job.resume(hook_test_case, test_report)
-
- def after_test(self, test, test_report): # noqa: D205,D400
- """Instruct the background aggregate metrics thread to stop running now that 'test' has
- finished running.
- """
- if self._background_job is None:
- return
-
- self.logger.info("Pausing the background aggregate metrics thread.")
- self._background_job.pause()
-
- if self._background_job.exc_info is not None:
- if isinstance(self._background_job.exc_info[1], errors.TestFailure):
- # If the mongo shell process running the JavaScript file exited with a non-zero
- # return code, then we raise an errors.ServerFailure exception to cause resmoke.py's
- # test execution to stop.
- raise errors.ServerFailure(self._background_job.exc_info[1].args[0])
- else:
- self.logger.error(
- "Encountered an error inside the background aggregate metrics thread.",
- exc_info=self._background_job.exc_info)
- raise self._background_job.exc_info[1]
+ super().__init__(hook_logger, fixture, js_filename, description,
+ "AggregateResourceConsumptionMetricsInBackground",
+ shell_options=shell_options)
diff --git a/buildscripts/resmokelib/testing/hooks/background_job.py b/buildscripts/resmokelib/testing/hooks/background_job.py
index b01196ba462..d239df4f6eb 100644
--- a/buildscripts/resmokelib/testing/hooks/background_job.py
+++ b/buildscripts/resmokelib/testing/hooks/background_job.py
@@ -3,6 +3,7 @@
import sys
import threading
+from buildscripts.resmokelib import errors
from buildscripts.resmokelib.testing.hooks import jsfile
@@ -98,3 +99,63 @@ class _ContinuousDynamicJSTestCase(jsfile.DynamicJSTestCase):
last one. This method returns without waiting for the current execution to finish.
"""
self._should_stop.set()
+
+
+class BackgroundRepeatedJsHook(jsfile.JSHook):
+ """A hook to run a js file on repeat in the background."""
+
+ IS_BACKGROUND = True
+
+ def __init__(self, hook_logger, fixture, js_filename, description, thread_name,
+ shell_options=None):
+ """Initialize BackgroundRepeatedJsHook."""
+ super().__init__(hook_logger, fixture, js_filename, description,
+ shell_options=shell_options)
+ self._background_job = None
+ self._thread_name = thread_name
+
+ def before_suite(self, test_report):
+ """Start the background thread."""
+ self._background_job = _BackgroundJob(self._thread_name)
+ self.logger.info("Starting background thread: {}.".format(self._thread_name))
+ self._background_job.start()
+
+ def after_suite(self, test_report, teardown_flag=None):
+ """Signal the background thread to exit, and wait until it does."""
+ if self._background_job is None:
+ return
+
+ self.logger.info("Stopping background thread: {}.".format(self._thread_name))
+ self._background_job.stop()
+
+ def before_test(self, test, test_report):
+ """Instruct the background thread to run while 'test' is also running."""
+ if self._background_job is None:
+ return
+
+ hook_test_case = _ContinuousDynamicJSTestCase.create_before_test(
+ test.logger, test, self, self._js_filename, self._shell_options)
+ hook_test_case.configure(self.fixture)
+
+ self.logger.info("Resuming background thread: {}.".format(self._thread_name))
+ self._background_job.resume(hook_test_case, test_report)
+
+ def after_test(self, test, test_report): # noqa: D205,D400
+ """Instruct the background thread to stop running now that 'test' has finished running."""
+ if self._background_job is None:
+ return
+
+ self.logger.info("Pausing background thread: {}.".format(self._thread_name))
+ self._background_job.pause()
+
+ if self._background_job.exc_info is not None:
+ if isinstance(self._background_job.exc_info[1], errors.TestFailure):
+ # If the mongo shell process running the JavaScript file exited with a non-zero
+ # return code, then we raise an errors.ServerFailure exception to cause resmoke.py's
+ # test execution to stop.
+ raise errors.ServerFailure(self._background_job.exc_info[1].args[0])
+ else:
+ self.logger.error(
+ "Encountered an error inside background thread {}.".format(self._thread_name),
+ exc_info=self._background_job.exc_info)
+ raise self._background_job.exc_info[1]
diff --git a/buildscripts/resmokelib/testing/hooks/jsfile.py b/buildscripts/resmokelib/testing/hooks/jsfile.py
index 7489968d834..9a27f5298c1 100644
--- a/buildscripts/resmokelib/testing/hooks/jsfile.py
+++ b/buildscripts/resmokelib/testing/hooks/jsfile.py
@@ -1,8 +1,8 @@
"""Interface for customizing the behavior of a test fixture by executing a JavaScript file."""
from buildscripts.resmokelib import errors
-from buildscripts.resmokelib.testing.hooks import interface
from buildscripts.resmokelib.testing.fixtures.interface import MultiClusterFixture
+from buildscripts.resmokelib.testing.hooks import interface
from buildscripts.resmokelib.testing.testcases import jstest
from buildscripts.resmokelib.utils import registry
diff --git a/buildscripts/resmokelib/testing/hooks/run_telemetry.py b/buildscripts/resmokelib/testing/hooks/run_telemetry.py
new file mode 100644
index 00000000000..ac008180339
--- /dev/null
+++ b/buildscripts/resmokelib/testing/hooks/run_telemetry.py
@@ -0,0 +1,21 @@
+"""
+Test hook for verifying $telemetry collects expected metrics and can redact query shapes.
+
+This runs in the background as other tests are ongoing.
+"""
+import os.path
+
+from buildscripts.resmokelib.testing.hooks.background_job import BackgroundRepeatedJsHook
+
+
+# Running in the background will better stress concurrency control with other operations.
+class RunTelemetry(BackgroundRepeatedJsHook):
+ """Periodically runs $telemetry."""
+
+ def __init__(self, hook_logger, fixture, shell_options=None):
+ """Initialize RunTelemetry."""
+
+ description = "Read telemetry data concurrently with ongoing tests"
+ js_filename = os.path.join("jstests", "hooks", "run_telemetry.js")
+ super().__init__(hook_logger, fixture, js_filename, description, "Telemetry",
+ shell_options=shell_options)
diff --git a/jstests/hooks/run_aggregate_metrics_background.js b/jstests/hooks/run_aggregate_metrics_background.js
index 32335852fcd..6ceb04c55f5 100644
--- a/jstests/hooks/run_aggregate_metrics_background.js
+++ b/jstests/hooks/run_aggregate_metrics_background.js
@@ -88,6 +88,7 @@ const aggregateMetricsBackground = function(host) {
// which is why we sleep for 1 second here. This sleep is here rather than in
// aggregate_metrics_background.py because the background job that file uses is designed to be run
// continuously so it is easier and cleaner to just sleep here.
+// TODO SERVER-75983 move this logic into the test harness.
sleep(1000);
if (topology.type === Topology.kStandalone) {
try {
diff --git a/jstests/hooks/run_telemetry.js b/jstests/hooks/run_telemetry.js
new file mode 100644
index 00000000000..c79b39de134
--- /dev/null
+++ b/jstests/hooks/run_telemetry.js
@@ -0,0 +1,117 @@
+/**
+ * Runs the $telemetry stage and ensures that all the expected fields are present.
+ */
+
+'use strict';
+
+(function() {
+load('jstests/libs/discover_topology.js'); // For Topology and DiscoverTopology.
+
+if (typeof db === 'undefined') {
+ throw new Error(
+ "Expected mongo shell to be connected a server, but global 'db' object isn't defined");
+}
+
+// Disable implicit sessions so tests that kill random sessions won't interrupt the operations in
+// this test that aren't resilient to interruptions.
+TestData.disableImplicitSessions = true;
+
+const topology = DiscoverTopology.findConnectedNodes(db.getMongo());
+
+function runTelemetry(host) {
+ function verifyFields(doc) {
+ const kOtherTopLevelFields = [
+ "key",
+ "metrics",
+ "asOf",
+ ];
+ const kFacetedMetricFields = [
+ "docsReturned",
+ "queryExecMicros",
+ ];
+ const kSubMetricFields = [
+ "sum",
+ "min",
+ "max",
+ "sumOfSquares",
+ ];
+ const kSingletonMetricFields = [
+ "execCount",
+ "firstSeenTimestamp",
+ "lastExecutionMicros",
+ ];
+ for (let key of kOtherTopLevelFields) {
+ assert(doc.hasOwnProperty(key),
+ `The telemetry output is missing the "${key}" property: ${tojson(doc)}`);
+ }
+ const metricsDoc = doc["metrics"];
+ for (let key of kSingletonMetricFields) {
+ assert(metricsDoc.hasOwnProperty(key),
+ `The telemetry 'metrics' output is missing the "${key}" property: ${
+ tojson(metricsDoc)}`);
+ }
+ for (let facetedMetric of kFacetedMetricFields) {
+ assert(metricsDoc.hasOwnProperty(facetedMetric),
+ `The telemetry 'metrics' output is missing the "${facetedMetric}" property: ${
+ tojson(metricsDoc)}`);
+ const subMetricsDoc = metricsDoc[facetedMetric];
+ for (let subMetric of kSubMetricFields) {
+ assert(
+ subMetricsDoc.hasOwnProperty(subMetric),
+ `The telemetry 'metrics' output is missing the ${subMetric} property from the ${
+ facetedMetric} subdoc: ${tojson(subMetricsDoc)}`);
+ }
+ }
+ }
+
+ let conn = new Mongo(host);
+ conn.setSecondaryOk();
+
+ assert.neq(
+ null, conn, "Failed to connect to host '" + host + "' for background metrics collection");
+
+ let db = conn.getDB("admin");
+ function verifyResults(telemetryCursor) {
+ while (telemetryCursor.hasNext()) {
+ let doc = telemetryCursor.next();
+ try {
+ verifyFields(doc);
+ } catch (e) {
+ print(
+ "caught exception while verifying that all expected fields are in the metrics " +
+ "output: " + tojson(doc));
+ throw (e);
+ }
+ }
+ }
+ verifyResults(db.aggregate([{$telemetry: {redactIdentifiers: false}}]));
+ verifyResults(db.aggregate([{$telemetry: {redactIdentifiers: true}}]));
+}
+
+// This file is run continuously and is very fast so we want to impose some kind of rate limiting
+// which is why we sleep for 1 second here. This sleep is here rather than in run_telemetry.py
+// because the background job that file uses is designed to be run continuously so it is easier and
+// cleaner to just sleep here.
+// TODO SERVER-75983 move this logic into the test harness.
+sleep(1000);
+if (topology.type === Topology.kStandalone) {
+ try {
+ runTelemetry(topology.mongod);
+ } catch (e) {
+ print("background aggregate metrics against the standalone failed");
+ throw e;
+ }
+} else if (topology.type === Topology.kReplicaSet) {
+ for (let replicaMember of topology.nodes) {
+ try {
+ runTelemetry(replicaMember);
+ } catch (e) {
+ print("background aggregate metrics was not successful against all replica set " +
+ "members");
+ throw e;
+ }
+ }
+} else {
+ throw new Error("Unsupported topology configuration: " + tojson(topology));
+}
+})();
diff --git a/src/mongo/db/matcher/schema/expression_internal_schema_allowed_properties.cpp b/src/mongo/db/matcher/schema/expression_internal_schema_allowed_properties.cpp
index c9a76a5d61f..30d42699589 100644
--- a/src/mongo/db/matcher/schema/expression_internal_schema_allowed_properties.cpp
+++ b/src/mongo/db/matcher/schema/expression_internal_schema_allowed_properties.cpp
@@ -140,7 +140,7 @@ void InternalSchemaAllowedPropertiesMatchExpression::serialize(BSONObjBuilder* b
for (auto&& item : _patternProperties) {
BSONObjBuilder itemBuilder(patternPropertiesBuilder.subobjStart());
if (opts.replacementForLiteralArgs) {
- itemBuilder.appendRegex("regex", opts.replacementForLiteralArgs.get());
+ itemBuilder.append("regex", opts.replacementForLiteralArgs.get());
} else {
itemBuilder.appendRegex("regex", item.first.rawRegex);
}
diff --git a/src/mongo/db/pipeline/document_source_telemetry.cpp b/src/mongo/db/pipeline/document_source_telemetry.cpp
index 4d81065855f..db8ee94cd21 100644
--- a/src/mongo/db/pipeline/document_source_telemetry.cpp
+++ b/src/mongo/db/pipeline/document_source_telemetry.cpp
@@ -144,14 +144,26 @@ DocumentSource::GetNextResult DocumentSourceTelemetry::doGetNext() {
const auto partitionReadTime =
Timestamp{Timestamp(Date_t::now().toMillisSinceEpoch() / 1000, 0)};
for (auto&& [key, metrics] : *partition) {
- auto swKey = metrics->redactKey(key, _redactIdentifiers, pExpCtx->opCtx);
+ StatusWith<BSONObj> swKey = Status{ErrorCodes::InternalError, "Uninitialized"};
+ try {
+ swKey = metrics->redactKey(key, _redactIdentifiers, pExpCtx->opCtx);
+ } catch (const DBException& ex) {
+ swKey = ex.toStatus();
+ }
if (!swKey.isOK()) {
+ // We would like to print the whole key unredacted to debug, but of course the point
+ // of redacting it is to not leak PII, so we will use this raw/naive redaction
+ // algorithm which may cut things short. This will only do anything if log redaction
+ // is enabled.
LOGV2_DEBUG(7349403,
- 3,
+ 1,
"Error encountered when redacting query shape, will not publish "
"telemetry for this entry.",
- "status"_attr = swKey.getStatus());
+ "status"_attr = swKey.getStatus(),
+ "rawKey"_attr = redact(key));
if (kDebugBuild) {
+ LOGV2_DEBUG(
+ 7559600, 0, "Failed to redact telemetry", "rawKey"_attr = redact(key));
tasserted(7349401,
"Was not able to re-parse telemetry key when reading telemetry.");
}
diff --git a/src/mongo/db/pipeline/field_path.cpp b/src/mongo/db/pipeline/field_path.cpp
index 9a0a53f61fc..525b81a311a 100644
--- a/src/mongo/db/pipeline/field_path.cpp
+++ b/src/mongo/db/pipeline/field_path.cpp
@@ -109,8 +109,8 @@ void FieldPath::uassertValidFieldName(StringData fieldName) {
if (fieldName[0] == '$' && !kAllowedDollarPrefixedFields.count(fieldName)) {
uasserted(16410,
- str::stream() << "FieldPath field names may not start with '$'."
- << dotsAndDollarsHint);
+ str::stream() << "FieldPath field names may not start with '$'. Got '"
+ << fieldName << "'. " << dotsAndDollarsHint);
}
uassert(
diff --git a/src/mongo/db/query/telemetry.cpp b/src/mongo/db/query/telemetry.cpp
index a338942d4d8..05a205f8910 100644
--- a/src/mongo/db/query/telemetry.cpp
+++ b/src/mongo/db/query/telemetry.cpp
@@ -160,6 +160,12 @@ BSONObj redactHintComponent(BSONObj obj, const SerializationOptions& opts, bool
continue;
}
+ // $natural doesn't need to be redacted.
+ if (elem.fieldNameStringData().compare(query_request_helper::kNaturalSortField) == 0) {
+ bob.append(elem);
+ continue;
+ }
+
if (opts.replacementForLiteralArgs && redactValues) {
bob.append(opts.serializeFieldPathFromString(elem.fieldName()),
opts.replacementForLiteralArgs.get());