summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrian DeLeonardis <brian.deleonardis@mongodb.com>2020-11-11 22:34:22 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-11-19 22:39:08 +0000
commit5247cf436984268be6231d90c6d140259d43b83f (patch)
treef28e5efc0964078af7ac811d6735ee4bba279aba
parent77554e9e4fd18811d6df84d8934c888814d034ec (diff)
downloadmongo-5247cf436984268be6231d90c6d140259d43b83f.tar.gz
SERVER-51403 Create concurrency suite with background operation metrics collection
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_metrics.yml28
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_replication_metrics.yml43
-rw-r--r--buildscripts/resmokelib/testing/hooks/aggregate_metrics_background.py71
-rw-r--r--etc/evergreen.yml22
-rw-r--r--jstests/hooks/run_aggregate_metrics_background.js111
5 files changed, 275 insertions, 0 deletions
diff --git a/buildscripts/resmokeconfig/suites/concurrency_metrics.yml b/buildscripts/resmokeconfig/suites/concurrency_metrics.yml
new file mode 100644
index 00000000000..1ad332c0300
--- /dev/null
+++ b/buildscripts/resmokeconfig/suites/concurrency_metrics.yml
@@ -0,0 +1,28 @@
+test_kind: fsm_workload_test
+
+selector:
+ roots:
+ - jstests/concurrency/fsm_workloads/**/*.js
+ exclude_with_any_tags:
+ - uses_transactions
+ - requires_replication
+ - requires_sharding
+
+executor:
+ archive:
+ hooks:
+ - AggregateResourceConsumptionMetricsInBackground
+ tests: true
+ config:
+ shell_options:
+ readMode: commands
+ hooks:
+ - class: AggregateResourceConsumptionMetricsInBackground
+ - class: CleanupConcurrencyWorkloads
+ fixture:
+ class: MongoDFixture
+ mongod_options:
+ set_parameters:
+ enableTestCommands: 1
+ measureOperationResourceConsumption: true
+ aggregateOperationResourceConsumptionMetrics: true
diff --git a/buildscripts/resmokeconfig/suites/concurrency_replication_metrics.yml b/buildscripts/resmokeconfig/suites/concurrency_replication_metrics.yml
new file mode 100644
index 00000000000..83eb39a99e9
--- /dev/null
+++ b/buildscripts/resmokeconfig/suites/concurrency_replication_metrics.yml
@@ -0,0 +1,43 @@
+test_kind: fsm_workload_test
+
+selector:
+ roots:
+ - jstests/concurrency/fsm_workloads/**/*.js
+ exclude_files:
+ ##
+ # Disabled due to MongoDB restrictions and/or workload restrictions
+ ##
+ # These workloads use >100MB of data, which can overwhelm test hosts.
+ - jstests/concurrency/fsm_workloads/agg_group_external.js
+ - jstests/concurrency/fsm_workloads/agg_sort_external.js
+
+ # The findAndModify_update_grow.js workload can cause OOM kills on test hosts.
+ - jstests/concurrency/fsm_workloads/findAndModify_update_grow.js
+
+ # These workloads run the reIndex command, which is only allowed on a standalone node.
+ - jstests/concurrency/fsm_workloads/reindex.js
+ - jstests/concurrency/fsm_workloads/reindex_background.js
+ - jstests/concurrency/fsm_workloads/reindex_writeconflict.js
+
+ exclude_with_any_tags:
+ - requires_sharding
+
+executor:
+ archive:
+ hooks:
+ - AggregateResourceConsumptionMetricsInBackground
+ tests: true
+ config:
+ shell_options:
+ readMode: commands
+ hooks:
+ - class: AggregateResourceConsumptionMetricsInBackground
+ - class: CleanupConcurrencyWorkloads
+ fixture:
+ class: ReplicaSetFixture
+ mongod_options:
+ set_parameters:
+ enableTestCommands: 1
+ measureOperationResourceConsumption: true
+ aggregateOperationResourceConsumptionMetrics: true
+ num_nodes: 3
diff --git a/buildscripts/resmokelib/testing/hooks/aggregate_metrics_background.py b/buildscripts/resmokelib/testing/hooks/aggregate_metrics_background.py
new file mode 100644
index 00000000000..ad66d1731a6
--- /dev/null
+++ b/buildscripts/resmokelib/testing/hooks/aggregate_metrics_background.py
@@ -0,0 +1,71 @@
+"""Test hook for running the $operationMetrics stage in the background.
+
+This hook runs continuously, but the run_aggregate_metrics_background.js file it runs will
+internally sleep for 1 second between runs.
+"""
+
+import os.path
+
+from buildscripts.resmokelib import errors
+from buildscripts.resmokelib.testing.hooks import jsfile
+from buildscripts.resmokelib.testing.hooks.background_job import _BackgroundJob, _ContinuousDynamicJSTestCase
+
+
+class AggregateResourceConsumptionMetricsInBackground(jsfile.JSHook):
+ """A hook to run $operationMetrics stage in the background."""
+
+ def __init__(self, hook_logger, fixture, shell_options=None):
+ """Initialize AggregateResourceConsumptionMetricsInBackground."""
+ description = "Run background $operationMetrics on all mongods while a test is running"
+ js_filename = os.path.join("jstests", "hooks", "run_aggregate_metrics_background.js")
+ jsfile.JSHook.__init__(self, hook_logger, fixture, js_filename, description,
+ shell_options=shell_options)
+ self._background_job = None
+
+ def before_suite(self, test_report):
+ """Start the background thread."""
+ self._background_job = _BackgroundJob("AggregateResourceConsumptionMetricsInBackground")
+ self.logger.info("Starting the background aggregate metrics thread.")
+ self._background_job.start()
+
+ def after_suite(self, test_report):
+ """Signal the background aggregate metrics thread to exit, and wait until it does."""
+ if self._background_job is None:
+ return
+
+ self.logger.info("Stopping the background aggregate metrics thread.")
+ self._background_job.stop()
+
+ def before_test(self, test, test_report):
+ """Instruct the background aggregate metrics thread to run while 'test' is also running."""
+ if self._background_job is None:
+ return
+
+ hook_test_case = _ContinuousDynamicJSTestCase.create_before_test(
+ self.logger, test, self, self._js_filename, self._shell_options)
+ hook_test_case.configure(self.fixture)
+
+ self.logger.info("Resuming the background aggregate metrics thread.")
+ self._background_job.resume(hook_test_case, test_report)
+
+ def after_test(self, test, test_report): # noqa: D205,D400
+ """Instruct the background aggregate metrics thread to stop running now that 'test' has
+ finished running.
+ """
+ if self._background_job is None:
+ return
+
+ self.logger.info("Pausing the background aggregate metrics thread.")
+ self._background_job.pause()
+
+ if self._background_job.exc_info is not None:
+ if isinstance(self._background_job.exc_info[1], errors.TestFailure):
+ # If the mongo shell process running the JavaScript file exited with a non-zero
+ # return code, then we raise an errors.ServerFailure exception to cause resmoke.py's
+ # test execution to stop.
+ raise errors.ServerFailure(self._background_job.exc_info[1].args[0])
+ else:
+ self.logger.error(
+ "Encountered an error inside the background aggregate metrics thread.",
+ exc_info=self._background_job.exc_info)
+ raise self._background_job.exc_info[1]
diff --git a/etc/evergreen.yml b/etc/evergreen.yml
index fd93bcde9bf..d826dfd4ef8 100644
--- a/etc/evergreen.yml
+++ b/etc/evergreen.yml
@@ -6491,6 +6491,24 @@ tasks:
resmoke_args: --suites=concurrency --storageEngine=wiredTiger
resmoke_jobs_max: 1
+- <<: *task_template
+ name: concurrency_metrics
+ commands:
+ - func: "do setup"
+ - func: "run tests"
+ vars:
+ resmoke_args: --suites=concurrency_metrics --storageEngine=wiredTiger
+ resmoke_jobs_max: 1
+
+- <<: *task_template
+ name: concurrency_replication_metrics
+ commands:
+ - func: "do setup"
+ - func: "run tests"
+ vars:
+ resmoke_args: --suites=concurrency_replication_metrics --storageEngine=wiredTiger
+ resmoke_jobs_max: 1
+
- name: concurrency_replication_gen
tags: ["concurrency", "common", "repl"]
commands:
@@ -11009,6 +11027,10 @@ buildvariants:
- name: compile_all_run_unittests_TG
distros:
- rhel62-large
+ - name: concurrency_metrics
+ - name: concurrency_replication_metrics
+ distros:
+ - rhel62-large
- name: jsCore
- name: noPassthrough_gen
- name: noPassthroughWithMongod_gen
diff --git a/jstests/hooks/run_aggregate_metrics_background.js b/jstests/hooks/run_aggregate_metrics_background.js
new file mode 100644
index 00000000000..e6d53b6a6f0
--- /dev/null
+++ b/jstests/hooks/run_aggregate_metrics_background.js
@@ -0,0 +1,111 @@
+/**
+ * Runs the $operationMetrics stage and ensures that all the expected fields are present.
+ */
+
+'use strict';
+
+(function() {
+load('jstests/libs/discover_topology.js'); // For Topology and DiscoverTopology.
+
+if (typeof db === 'undefined') {
+ throw new Error(
+ "Expected mongo shell to be connected a server, but global 'db' object isn't defined");
+}
+
+// Disable implicit sessions so FSM workloads that kill random sessions won't interrupt the
+// operations in this test that aren't resilient to interruptions.
+TestData.disableImplicitSessions = true;
+
+const topology = DiscoverTopology.findConnectedNodes(db.getMongo());
+
+const aggregateMetricsBackground = function(host) {
+ function verifyFields(doc) {
+ const kTopLevelFields = [
+ "docBytesWritten",
+ "docUnitsWritten",
+ "idxEntryBytesWritten",
+ "idxEntryUnitsWritten",
+ "cpuNanos",
+ "db",
+ "primaryMetrics",
+ "secondaryMetrics"
+ ];
+ const kReadFields = [
+ "docBytesRead",
+ "docUnitsRead",
+ "idxEntryBytesRead",
+ "idxEntryUnitsRead",
+ "keysSorted",
+ "docUnitsReturned"
+ ];
+
+ for (let key of kTopLevelFields) {
+ assert(doc.hasOwnProperty(key), "The metrics output is missing the property: " + key);
+ }
+ let primaryMetrics = doc.primaryMetrics;
+ for (let key of kReadFields) {
+ assert(primaryMetrics.hasOwnProperty(key),
+ "The metrics output is missing the property: primaryMetrics." + key);
+ }
+ let secondaryMetrics = doc.secondaryMetrics;
+ for (let key of kReadFields) {
+ assert(secondaryMetrics.hasOwnProperty(key),
+ "The metrics output is missing the property: secondaryMetrics." + key);
+ }
+ }
+
+ let conn = new Mongo(host);
+ conn.setSecondaryOk();
+
+ assert.neq(
+ null, conn, "Failed to connect to host '" + host + "' for background metrics collection");
+
+ // Filter out arbiters.
+ if (conn.adminCommand({isMaster: 1}).arbiterOnly) {
+ print("Skipping background aggregation against test node: " + host +
+ " because it is an arbiter and has no data.");
+ return;
+ }
+
+ let db = conn.getDB("admin");
+ let clearMetrics = Math.random() < 0.9 ? false : true;
+ print("Running $operationMetrics with {clearMetrics: " + clearMetrics + "} on host: " + host);
+ const cursor = db.aggregate([{$operationMetrics: {clearMetrics: clearMetrics}}]);
+ while (cursor.hasNext()) {
+ let doc = cursor.next();
+ try {
+ verifyFields(doc);
+ } catch (e) {
+ print("caught exception while verifying that all expected fields are in the metrics " +
+ "output: " + tojson(doc));
+ throw (e);
+ }
+ }
+};
+
+// This file is run continuously and is very fast so we want to impose some kind of rate limiting
+// which is why we sleep for 1 second here. This sleep is here rather than in
+// aggregate_metrics_background.py because the background job that file uses is designed to be run
+// continuously so it is easier and cleaner to just sleep here.
+sleep(1000);
+if (topology.type === Topology.kStandalone) {
+ try {
+ aggregateMetricsBackground(topology.mongod);
+ } catch (e) {
+ print("background aggregate metrics against the standalone failed");
+ throw e;
+ }
+} else if (topology.type === Topology.kReplicaSet) {
+ for (let replicaMember of topology.nodes) {
+ try {
+ aggregateMetricsBackground(replicaMember);
+ } catch (e) {
+ print("background aggregate metrics was not successful against all replica set " +
+ "members");
+ throw e;
+ }
+ }
+} else {
+ throw new Error("Unsupported topology configuration: " + tojson(topology));
+}
+})();