diff options
author | Brian DeLeonardis <brian.deleonardis@mongodb.com> | 2020-11-11 22:34:22 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-11-19 22:39:08 +0000 |
commit | 5247cf436984268be6231d90c6d140259d43b83f (patch) | |
tree | f28e5efc0964078af7ac811d6735ee4bba279aba | |
parent | 77554e9e4fd18811d6df84d8934c888814d034ec (diff) | |
download | mongo-5247cf436984268be6231d90c6d140259d43b83f.tar.gz |
SERVER-51403 Create concurrency suite with background operation metrics collection
5 files changed, 275 insertions, 0 deletions
diff --git a/buildscripts/resmokeconfig/suites/concurrency_metrics.yml b/buildscripts/resmokeconfig/suites/concurrency_metrics.yml new file mode 100644 index 00000000000..1ad332c0300 --- /dev/null +++ b/buildscripts/resmokeconfig/suites/concurrency_metrics.yml @@ -0,0 +1,28 @@ +test_kind: fsm_workload_test + +selector: + roots: + - jstests/concurrency/fsm_workloads/**/*.js + exclude_with_any_tags: + - uses_transactions + - requires_replication + - requires_sharding + +executor: + archive: + hooks: + - AggregateResourceConsumptionMetricsInBackground + tests: true + config: + shell_options: + readMode: commands + hooks: + - class: AggregateResourceConsumptionMetricsInBackground + - class: CleanupConcurrencyWorkloads + fixture: + class: MongoDFixture + mongod_options: + set_parameters: + enableTestCommands: 1 + measureOperationResourceConsumption: true + aggregateOperationResourceConsumptionMetrics: true diff --git a/buildscripts/resmokeconfig/suites/concurrency_replication_metrics.yml b/buildscripts/resmokeconfig/suites/concurrency_replication_metrics.yml new file mode 100644 index 00000000000..83eb39a99e9 --- /dev/null +++ b/buildscripts/resmokeconfig/suites/concurrency_replication_metrics.yml @@ -0,0 +1,43 @@ +test_kind: fsm_workload_test + +selector: + roots: + - jstests/concurrency/fsm_workloads/**/*.js + exclude_files: + ## + # Disabled due to MongoDB restrictions and/or workload restrictions + ## + # These workloads use >100MB of data, which can overwhelm test hosts. + - jstests/concurrency/fsm_workloads/agg_group_external.js + - jstests/concurrency/fsm_workloads/agg_sort_external.js + + # The findAndModify_update_grow.js workload can cause OOM kills on test hosts. + - jstests/concurrency/fsm_workloads/findAndModify_update_grow.js + + # These workloads run the reIndex command, which is only allowed on a standalone node. + - jstests/concurrency/fsm_workloads/reindex.js + - jstests/concurrency/fsm_workloads/reindex_background.js + - jstests/concurrency/fsm_workloads/reindex_writeconflict.js + + exclude_with_any_tags: + - requires_sharding + +executor: + archive: + hooks: + - AggregateResourceConsumptionMetricsInBackground + tests: true + config: + shell_options: + readMode: commands + hooks: + - class: AggregateResourceConsumptionMetricsInBackground + - class: CleanupConcurrencyWorkloads + fixture: + class: ReplicaSetFixture + mongod_options: + set_parameters: + enableTestCommands: 1 + measureOperationResourceConsumption: true + aggregateOperationResourceConsumptionMetrics: true + num_nodes: 3 diff --git a/buildscripts/resmokelib/testing/hooks/aggregate_metrics_background.py b/buildscripts/resmokelib/testing/hooks/aggregate_metrics_background.py new file mode 100644 index 00000000000..ad66d1731a6 --- /dev/null +++ b/buildscripts/resmokelib/testing/hooks/aggregate_metrics_background.py @@ -0,0 +1,71 @@ +"""Test hook for running the $operationMetrics stage in the background. + +This hook runs continuously, but the run_aggregate_metrics_background.js file it runs will +internally sleep for 1 second between runs. +""" + +import os.path + +from buildscripts.resmokelib import errors +from buildscripts.resmokelib.testing.hooks import jsfile +from buildscripts.resmokelib.testing.hooks.background_job import _BackgroundJob, _ContinuousDynamicJSTestCase + + +class AggregateResourceConsumptionMetricsInBackground(jsfile.JSHook): + """A hook to run $operationMetrics stage in the background.""" + + def __init__(self, hook_logger, fixture, shell_options=None): + """Initialize AggregateResourceConsumptionMetricsInBackground.""" + description = "Run background $operationMetrics on all mongods while a test is running" + js_filename = os.path.join("jstests", "hooks", "run_aggregate_metrics_background.js") + jsfile.JSHook.__init__(self, hook_logger, fixture, js_filename, description, + shell_options=shell_options) + self._background_job = None + + def before_suite(self, test_report): + """Start the background thread.""" + self._background_job = _BackgroundJob("AggregateResourceConsumptionMetricsInBackground") + self.logger.info("Starting the background aggregate metrics thread.") + self._background_job.start() + + def after_suite(self, test_report): + """Signal the background aggregate metrics thread to exit, and wait until it does.""" + if self._background_job is None: + return + + self.logger.info("Stopping the background aggregate metrics thread.") + self._background_job.stop() + + def before_test(self, test, test_report): + """Instruct the background aggregate metrics thread to run while 'test' is also running.""" + if self._background_job is None: + return + + hook_test_case = _ContinuousDynamicJSTestCase.create_before_test( + self.logger, test, self, self._js_filename, self._shell_options) + hook_test_case.configure(self.fixture) + + self.logger.info("Resuming the background aggregate metrics thread.") + self._background_job.resume(hook_test_case, test_report) + + def after_test(self, test, test_report): # noqa: D205,D400 + """Instruct the background aggregate metrics thread to stop running now that 'test' has + finished running. + """ + if self._background_job is None: + return + + self.logger.info("Pausing the background aggregate metrics thread.") + self._background_job.pause() + + if self._background_job.exc_info is not None: + if isinstance(self._background_job.exc_info[1], errors.TestFailure): + # If the mongo shell process running the JavaScript file exited with a non-zero + # return code, then we raise an errors.ServerFailure exception to cause resmoke.py's + # test execution to stop. + raise errors.ServerFailure(self._background_job.exc_info[1].args[0]) + else: + self.logger.error( + "Encountered an error inside the background aggregate metrics thread.", + exc_info=self._background_job.exc_info) + raise self._background_job.exc_info[1] diff --git a/etc/evergreen.yml b/etc/evergreen.yml index fd93bcde9bf..d826dfd4ef8 100644 --- a/etc/evergreen.yml +++ b/etc/evergreen.yml @@ -6491,6 +6491,24 @@ tasks: resmoke_args: --suites=concurrency --storageEngine=wiredTiger resmoke_jobs_max: 1 +- <<: *task_template + name: concurrency_metrics + commands: + - func: "do setup" + - func: "run tests" + vars: + resmoke_args: --suites=concurrency_metrics --storageEngine=wiredTiger + resmoke_jobs_max: 1 + +- <<: *task_template + name: concurrency_replication_metrics + commands: + - func: "do setup" + - func: "run tests" + vars: + resmoke_args: --suites=concurrency_replication_metrics --storageEngine=wiredTiger + resmoke_jobs_max: 1 + - name: concurrency_replication_gen tags: ["concurrency", "common", "repl"] commands: @@ -11009,6 +11027,10 @@ buildvariants: - name: compile_all_run_unittests_TG distros: - rhel62-large + - name: concurrency_metrics + - name: concurrency_replication_metrics + distros: + - rhel62-large - name: jsCore - name: noPassthrough_gen - name: noPassthroughWithMongod_gen diff --git a/jstests/hooks/run_aggregate_metrics_background.js b/jstests/hooks/run_aggregate_metrics_background.js new file mode 100644 index 00000000000..e6d53b6a6f0 --- /dev/null +++ b/jstests/hooks/run_aggregate_metrics_background.js @@ -0,0 +1,111 @@ +/** + * Runs the $operationMetrics stage and ensures that all the expected fields are present. + */ + +'use strict'; + +(function() { +load('jstests/libs/discover_topology.js'); // For Topology and DiscoverTopology. + +if (typeof db === 'undefined') { + throw new Error( + "Expected mongo shell to be connected a server, but global 'db' object isn't defined"); +} + +// Disable implicit sessions so FSM workloads that kill random sessions won't interrupt the +// operations in this test that aren't resilient to interruptions. +TestData.disableImplicitSessions = true; + +const topology = DiscoverTopology.findConnectedNodes(db.getMongo()); + +const aggregateMetricsBackground = function(host) { + function verifyFields(doc) { + const kTopLevelFields = [ + "docBytesWritten", + "docUnitsWritten", + "idxEntryBytesWritten", + "idxEntryUnitsWritten", + "cpuNanos", + "db", + "primaryMetrics", + "secondaryMetrics" + ]; + const kReadFields = [ + "docBytesRead", + "docUnitsRead", + "idxEntryBytesRead", + "idxEntryUnitsRead", + "keysSorted", + "docUnitsReturned" + ]; + + for (let key of kTopLevelFields) { + assert(doc.hasOwnProperty(key), "The metrics output is missing the property: " + key); + } + let primaryMetrics = doc.primaryMetrics; + for (let key of kReadFields) { + assert(primaryMetrics.hasOwnProperty(key), + "The metrics output is missing the property: primaryMetrics." + key); + } + let secondaryMetrics = doc.secondaryMetrics; + for (let key of kReadFields) { + assert(secondaryMetrics.hasOwnProperty(key), + "The metrics output is missing the property: secondaryMetrics." + key); + } + } + + let conn = new Mongo(host); + conn.setSecondaryOk(); + + assert.neq( + null, conn, "Failed to connect to host '" + host + "' for background metrics collection"); + + // Filter out arbiters. + if (conn.adminCommand({isMaster: 1}).arbiterOnly) { + print("Skipping background aggregation against test node: " + host + + " because it is an arbiter and has no data."); + return; + } + + let db = conn.getDB("admin"); + let clearMetrics = Math.random() < 0.9 ? false : true; + print("Running $operationMetrics with {clearMetrics: " + clearMetrics + "} on host: " + host); + const cursor = db.aggregate([{$operationMetrics: {clearMetrics: clearMetrics}}]); + while (cursor.hasNext()) { + let doc = cursor.next(); + try { + verifyFields(doc); + } catch (e) { + print("caught exception while verifying that all expected fields are in the metrics " + + "output: " + tojson(doc)); + throw (e); + } + } +}; + +// This file is run continuously and is very fast so we want to impose some kind of rate limiting +// which is why we sleep for 1 second here. This sleep is here rather than in +// aggregate_metrics_background.py because the background job that file uses is designed to be run +// continuously so it is easier and cleaner to just sleep here. +sleep(1000); +if (topology.type === Topology.kStandalone) { + try { + aggregateMetricsBackground(topology.mongod); + } catch (e) { + print("background aggregate metrics against the standalone failed"); + throw e; + } +} else if (topology.type === Topology.kReplicaSet) { + for (let replicaMember of topology.nodes) { + try { + aggregateMetricsBackground(replicaMember); + } catch (e) { + print("background aggregate metrics was not successful against all replica set " + + "members"); + throw e; + } + } +} else { + throw new Error("Unsupported topology configuration: " + tojson(topology)); +} +})(); |