summaryrefslogtreecommitdiff
path: root/buildscripts
diff options
context:
space:
mode:
Diffstat (limited to 'buildscripts')
-rw-r--r--buildscripts/resmokeconfig/suites/replica_sets_jscore_passthrough.yml24
-rw-r--r--buildscripts/resmokelib/testing/hooks/dbhash_background.py175
-rw-r--r--buildscripts/resmokelib/testing/hooks/interface.py2
-rw-r--r--buildscripts/resmokelib/testing/job.py7
-rw-r--r--buildscripts/resmokelib/testing/testcases/jstest.py2
5 files changed, 207 insertions, 3 deletions
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_jscore_passthrough.yml
index 189a1655f63..d7222328cbe 100644
--- a/buildscripts/resmokeconfig/suites/replica_sets_jscore_passthrough.yml
+++ b/buildscripts/resmokeconfig/suites/replica_sets_jscore_passthrough.yml
@@ -9,11 +9,27 @@ selector:
- jstests/core/opcounters_write_cmd.js
- jstests/core/read_after_optime.js
- jstests/core/capped_update.js
+ # These tests change the featureCompatibilityVersion which makes it so transactions aren't
+ # supported.
+ - jstests/core/txns/only_allow_transactions_on_FCV_40.js
+ - jstests/core/txns/abort_transactions_on_FCV_downgrade.js
+ # These tests change the transactionLifetimeLimitSeconds server parameter which conflicts with how
+ # the CheckReplDBHashInBackground hook doesn't want transactions to be reaped while it is running.
+ - jstests/core/txns/abort_expired_transaction.js
+ - jstests/core/txns/abort_transaction_thread_does_not_block_on_locks.js
+ # The set_param1.js test attempts to compare the response from running the {getParameter: "*"}
+ # command multiple times, which may observe the change to the "transactionLifetimeLimitSeconds"
+ # server parameter.
+ - jstests/core/set_param1.js
+ # TODO SERVER-35217: Attempting to kill a session while multiple transactions have been started on
+ # the server may lead to deadlock.
+ - jstests/core/txns/kill_sessions_kills_transaction.js
executor:
archive:
hooks:
- CheckReplDBHash
+ - CheckReplDBHashInBackground
- CheckReplOplogs
- ValidateCollections
config:
@@ -24,6 +40,7 @@ executor:
# The CheckReplDBHash hook waits until all operations have replicated to and have been applied
# on the secondaries, so we run the ValidateCollections hook after it to ensure we're
# validating the entire contents of the collection.
+ - class: CheckReplDBHashInBackground
- class: CheckReplOplogs
- class: CheckReplDBHash
- class: ValidateCollections
@@ -35,4 +52,11 @@ executor:
set_parameters:
enableTestCommands: 1
numInitialSyncAttempts: 1
+ # When running tests that intentionally trigger a DuplicateKeyError, we somehow end up
+ # performing an atClusterTime read in the CheckReplDBHashInBackground hook based on an
+ # operationTime that is greater than anything in the oplog. The periodic no-op writer must
+ # be enabled to ensure that we eventually reach the clusterTime we are waiting for.
+ #
+ # TODO SERVER-35156: Remove the following line to disable the periodic no-op writer.
+ writePeriodicNoops: 1
num_nodes: 2
diff --git a/buildscripts/resmokelib/testing/hooks/dbhash_background.py b/buildscripts/resmokelib/testing/hooks/dbhash_background.py
new file mode 100644
index 00000000000..10d49ee8e3f
--- /dev/null
+++ b/buildscripts/resmokelib/testing/hooks/dbhash_background.py
@@ -0,0 +1,175 @@
+"""Test hook for verifying data consistency across a replica set.
+
+Unlike dbhash.py, this version of the hook runs continously in a background thread while the test is
+running.
+"""
+
+from __future__ import absolute_import
+
+import os.path
+import sys
+import threading
+
+from buildscripts.resmokelib import errors
+from buildscripts.resmokelib.testing.hooks import jsfile
+from buildscripts.resmokelib.testing.testcases import interface as testcase
+
+
+class CheckReplDBHashInBackground(jsfile.JSHook):
+ """A hook for comparing the dbhashes of all replica set members while a test is running."""
+
+ def __init__(self, hook_logger, fixture, shell_options=None):
+ """Initialize CheckReplDBHashInBackground."""
+ description = "Check dbhashes of all replica set members while a test is running"
+ js_filename = os.path.join("jstests", "hooks", "run_check_repl_dbhash_background.js")
+ jsfile.JSHook.__init__(self, hook_logger, fixture, js_filename, description,
+ shell_options=shell_options)
+
+ self._background_job = None
+
+ def before_suite(self, test_report):
+ """Start the background thread."""
+ client = self.fixture.mongo_client()
+ server_status = client.admin.command("serverStatus")
+ if not server_status["storageEngine"].get("supportsSnapshotReadConcern", False):
+ self.logger.info("Not enabling the background thread because '%s' storage engine"
+ " doesn't support snapshot reads.",
+ server_status["storageEngine"]["name"])
+ return
+
+ self._background_job = _BackgroundJob()
+ self.logger.info("Starting the background thread.")
+ self._background_job.start()
+
+ def after_suite(self, test_report):
+ """Signal the background thread to exit, and wait until it does."""
+ if self._background_job is None:
+ return
+
+ self.logger.info("Stopping the background thread.")
+ self._background_job.stop()
+
+ def before_test(self, test, test_report):
+ """Instruct the background thread to run the dbhash check while 'test' is also running."""
+ if self._background_job is None:
+ return
+
+ hook_test_case = _ContinuousDynamicJSTestCase.create_before_test(
+ self.logger.test_case_logger, test, self, self._js_filename, self._shell_options)
+ hook_test_case.configure(self.fixture)
+
+ self.logger.info("Resuming the background thread.")
+ self._background_job.resume(hook_test_case, test_report)
+
+ def after_test(self, test, test_report): # noqa: D205,D400
+ """Instruct the background thread to stop running the dbhash check now that 'test' has
+ finished running.
+ """
+ if self._background_job is None:
+ return
+
+ self.logger.info("Pausing the background thread.")
+ self._background_job.pause()
+
+ if self._background_job.exc_info is not None:
+ if isinstance(self._background_job.exc_info[1], errors.TestFailure):
+ # If the mongo shell process running the JavaScript file exited with a non-zero
+ # return code, then we raise an errors.ServerFailure exception to cause resmoke.py's
+ # test execution to stop.
+ raise errors.ServerFailure(self._background_job.exc_info[1].args[0])
+ else:
+ self.logger.error("Encountered an error inside the background thread.",
+ exc_info=self._background_job.exc_info)
+ raise self._background_job.exc_info[1]
+
+
+class _BackgroundJob(threading.Thread): # pylint: disable=too-many-instance-attributes
+ """A thread for running the dbhash check while a test is running."""
+
+ def __init__(self):
+ """Initialize _BackgroundJob."""
+ threading.Thread.__init__(self, name="CheckReplDBHashInBackground")
+ self.daemon = True
+
+ self._lock = threading.Lock()
+ self._cond = threading.Condition(self._lock)
+
+ self._should_stop = False
+ self._should_resume = False
+ self._is_idle = True
+
+ self._hook_test_case = None
+ self._test_report = None
+ self.exc_info = None
+
+ def run(self):
+ while True:
+ with self._lock:
+ while not self._should_resume:
+ self._cond.wait()
+
+ # We have the background thread set 'self._should_resume' back to false to ensure
+ # that 'self._hook_test_case.run_dynamic_test()' is only called once before the
+ # resume() method is called again.
+ self._should_resume = False
+
+ if self._should_stop:
+ break
+
+ # We are about to execute 'self._hook_test_case' so we mark the background thread as
+ # no longer being idle.
+ self._is_idle = False
+
+ try:
+ self._hook_test_case.run_dynamic_test(self._test_report)
+ except: # pylint: disable=bare-except
+ self.exc_info = sys.exc_info()
+ finally:
+ with self._lock:
+ self._is_idle = True
+ self._cond.notify_all()
+
+ def stop(self):
+ """Signal the background thread to exit, and wait until it does."""
+ with self._lock:
+ self._should_stop = True
+ self.resume(hook_test_case=None, test_report=None)
+ self.join()
+
+ def pause(self): # noqa: D205,D400
+ """Signal the background thread that it should stop executing 'self._hook_test_case', and
+ wait until the current execution has finished.
+ """
+ self._hook_test_case.signal_stop_test()
+ with self._lock:
+ while not self._is_idle:
+ self._cond.wait()
+
+ def resume(self, hook_test_case, test_report):
+ """Instruct the background thread to start executing 'hook_test_case'."""
+ self._hook_test_case = hook_test_case
+ self._test_report = test_report
+ self.exc_info = None
+ with self._lock:
+ self._should_resume = True
+ self._cond.notify_all()
+
+
+class _ContinuousDynamicJSTestCase(jsfile.DynamicJSTestCase):
+ """A dynamic TestCase that runs a JavaScript file repeatedly."""
+
+ def __init__(self, *args, **kwargs):
+ """Initialize _ContinuousDynamicJSTestCase."""
+ jsfile.DynamicJSTestCase.__init__(self, *args, **kwargs)
+ self._should_stop = threading.Event()
+
+ def run_test(self):
+ """Execute the test repeatedly."""
+ while not self._should_stop.is_set():
+ jsfile.DynamicJSTestCase.run_test(self)
+
+ def signal_stop_test(self): # noqa: D205,D400
+ """Indicate to the thread executing the run_test() method that the current execution is the
+ last one. This method returns without waiting for the current execution to finish.
+ """
+ self._should_stop.set()
diff --git a/buildscripts/resmokelib/testing/hooks/interface.py b/buildscripts/resmokelib/testing/hooks/interface.py
index 8e938355001..0c2f65077bc 100644
--- a/buildscripts/resmokelib/testing/hooks/interface.py
+++ b/buildscripts/resmokelib/testing/hooks/interface.py
@@ -78,7 +78,7 @@ class DynamicTestCase(testcase.TestCase): # pylint: disable=abstract-method
self.return_code = 1
self.logger.exception("{0} failed".format(self.description))
test_report.addFailure(self, sys.exc_info())
- raise errors.TestFailure(err)
+ raise errors.TestFailure(err.args[0])
except:
self.return_code = 2
test_report.addFailure(self, sys.exc_info())
diff --git a/buildscripts/resmokelib/testing/job.py b/buildscripts/resmokelib/testing/job.py
index 41e80bbee1b..f5586d8dfc6 100644
--- a/buildscripts/resmokelib/testing/job.py
+++ b/buildscripts/resmokelib/testing/job.py
@@ -95,7 +95,12 @@ class Job(object):
test(self.report)
try:
- if self.suite_options.fail_fast and not self.report.wasSuccessful():
+ # We are intentionally only checking the individual 'test' status and not calling
+ # report.wasSuccessful() here. It is possible that a thread running in the background as
+ # part of a hook has added a failed test case to 'self.report'. Checking the individual
+ # 'test' status ensures self._run_hooks_after_tests() is called if it is a hook's test
+ # case that has failed and not 'test' that has failed.
+ if self.suite_options.fail_fast and self.report.find_test_info(test).status != "pass":
self.logger.info("%s failed, so stopping..." % (test.short_description()))
raise errors.StopExecution("%s failed" % (test.short_description()))
diff --git a/buildscripts/resmokelib/testing/testcases/jstest.py b/buildscripts/resmokelib/testing/testcases/jstest.py
index ae12ac34ed6..0d150930a44 100644
--- a/buildscripts/resmokelib/testing/testcases/jstest.py
+++ b/buildscripts/resmokelib/testing/testcases/jstest.py
@@ -224,7 +224,7 @@ class JSTestCase(interface.ProcessTestCase):
self.logger.error(
"Encountered an error inside thread %d running jstest %s.", thread_id,
self.basename(), exc_info=thread.exc_info)
- raise thread.exc_info
+ raise thread.exc_info[1]
def run_test(self):
"""Execute the test."""