summaryrefslogtreecommitdiff
path: root/pytests/powertest.py
diff options
context:
space:
mode:
Diffstat (limited to 'pytests/powertest.py')
-rwxr-xr-xpytests/powertest.py419
1 files changed, 355 insertions, 64 deletions
diff --git a/pytests/powertest.py b/pytests/powertest.py
index 49ce400d589..be18ebeabe2 100755
--- a/pytests/powertest.py
+++ b/pytests/powertest.py
@@ -2,11 +2,12 @@
"""Powercycle test
-Tests robustness of mongod to survice multiple powercycle events.
+Tests robustness of mongod to survive multiple powercycle events.
"""
from __future__ import print_function
+import atexit
import collections
import copy
import datetime
@@ -101,6 +102,36 @@ LOGGER = logging.getLogger(__name__)
This script will either download a MongoDB tarball or use an existing setup. """
+def exit_handler():
+ """Exit handler, deletes all named temporary files."""
+ LOGGER.debug("Exit handler invoked, cleaning up temporary files")
+ try:
+ NamedTempFile.delete_all()
+ except:
+ pass
+
+
+def kill_processes(pids, kill_children=True):
+ """Kill a list of processes and optionally it's children."""
+ for pid in pids:
+ LOGGER.debug("Killing process with pid %d", pid)
+ try:
+ proc = psutil.Process(pid)
+ except psutil.NoSuchProcess:
+ LOGGER.error("Could not kill process with pid %d, as it no longer exists", pid)
+ continue
+ if kill_children:
+ child_procs = proc.children(recursive=True)
+ child_pids = []
+ for child in child_procs:
+ child_pids.append(child.pid)
+ kill_processes(child_pids, kill_children=False)
+ try:
+ proc.kill()
+ except psutil.NoSuchProcess:
+ LOGGER.error("Could not kill process with pid %d, as it no longer exists", pid)
+
+
def get_extension(filename):
"""Returns the extension of a file."""
return os.path.splitext(filename)[-1]
@@ -140,40 +171,73 @@ def executable_exists_in_path(executable):
return distutils.spawn.find_executable(executable) is not None
+def create_temp_executable_file(cmds):
+ """Creates an executable temporary file containing 'cmds'. Returns file name."""
+ temp_file_name = NamedTempFile.create(suffix=".sh")
+ with NamedTempFile.get(temp_file_name) as temp_file:
+ temp_file.write(cmds)
+ os_st = os.stat(temp_file_name)
+ os.chmod(temp_file_name, os_st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
+ return temp_file_name
+
+
+def start_cmd(cmd, use_file=False):
+ """Starts command and returns pid from Popen"""
+
+ orig_cmd = ""
+ # Multi-commands need to be written to a temporary file to execute on Windows.
+ # This is due to complications with invoking Bash in Windows.
+ if use_file:
+ orig_cmd = cmd
+ temp_file = create_temp_executable_file(cmd)
+ # The temporary file name will have '\' on Windows and needs to be converted to '/'.
+ cmd = "bash -c {}".format(temp_file.replace("\\", "/"))
+
+ # If 'cmd' is specified as a string, convert it to a list of strings.
+ if isinstance(cmd, str):
+ cmd = shlex.split(cmd)
+
+ if use_file:
+ LOGGER.debug("Executing '%s', tempfile contains: %s", cmd, orig_cmd)
+ else:
+ LOGGER.debug("Executing '%s'", cmd)
+
+ proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+
+ return proc.pid
+
+
def execute_cmd(cmd, use_file=False):
- """Executes command and returns return_code and output from command"""
+ """Executes command and returns return_code, output from command"""
orig_cmd = ""
# Multi-commands need to be written to a temporary file to execute on Windows.
# This is due to complications with invoking Bash in Windows.
if use_file:
orig_cmd = cmd
- with tempfile.NamedTemporaryFile(suffix=".sh", delete=False) as temp_file:
- temp_file.write(cmd)
- os_st = os.stat(temp_file.name)
- os.chmod(temp_file.name, os_st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
+ temp_file = create_temp_executable_file(cmd)
# The temporary file name will have '\' on Windows and needs to be converted to '/'.
- cmd = "bash -c {}".format(temp_file.name.replace("\\", "/"))
+ cmd = "bash -c {}".format(temp_file.replace("\\", "/"))
# If 'cmd' is specified as a string, convert it to a list of strings.
if isinstance(cmd, str):
cmd = shlex.split(cmd)
if use_file:
- LOGGER.info("Executing '%s', tempfile contains: %s", cmd, orig_cmd)
+ LOGGER.debug("Executing '%s', tempfile contains: %s", cmd, orig_cmd)
else:
- LOGGER.info("Executing '%s'", cmd)
+ LOGGER.debug("Executing '%s'", cmd)
try:
- proc = subprocess.Popen(
- cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
output, _ = proc.communicate()
error_code = proc.returncode
if error_code:
output = "Error executing cmd {}: {}".format(cmd, output)
finally:
if use_file:
- os.remove(temp_file.name)
+ os.remove(temp_file)
+
return error_code, output
@@ -202,23 +266,44 @@ def parse_options(options):
return options_map
-def download_file(url, file_name):
+def download_file(url, file_name, download_retries=5):
"""Returns True if download was successful. Raises error if download fails."""
LOGGER.info("Downloading %s to %s", url, file_name)
- with requests.Session() as session:
- adapter = requests.adapters.HTTPAdapter(max_retries=5)
- session.mount(url, adapter)
- response = session.get(url, stream=True)
- response.raise_for_status()
-
- with open(file_name, "wb") as file_handle:
- for block in response.iter_content(1024):
- file_handle.write(block)
+ while download_retries > 0:
- adapter.close()
+ with requests.Session() as session:
+ adapter = requests.adapters.HTTPAdapter(max_retries=download_retries)
+ session.mount(url, adapter)
+ response = session.get(url, stream=True)
+ response.raise_for_status()
- return True
+ with open(file_name, "wb") as file_handle:
+ try:
+ for block in response.iter_content(1024 * 1000):
+ file_handle.write(block)
+ except requests.exceptions.ChunkedEncodingError as err:
+ download_retries -= 1
+ if download_retries == 0:
+ raise Exception("Incomplete download for URL {}: {}".format(url, err))
+ continue
+
+ # Check if file download was completed.
+ if "Content-length" in response.headers:
+ url_content_length = int(response.headers["Content-length"])
+ file_size = os.path.getsize(file_name)
+ # Retry download if file_size has an unexpected size.
+ if url_content_length != file_size:
+ download_retries -= 1
+ if download_retries == 0:
+ raise Exception("Downloaded file size ({} bytes) doesn't match content length"
+ "({} bytes) for URL {}".format(
+ file_size, url_content_length, url))
+ continue
+
+ return True
+
+ raise Exception("Unknown download problem for {} to file {}".format(url, file_name))
def install_tarball(tarball, root_dir):
@@ -361,7 +446,7 @@ def install_mongod(bin_dir=None, tarball_url="latest", root_dir=None):
# Symlink the bin dir from the tarball to 'root_bin_dir'.
# Since get_bin_dir returns an abolute path, we need to remove 'root_dir'
tarball_bin_dir = get_bin_dir(root_dir).replace("{}/".format(root_dir), "")
- LOGGER.info("Symlink %s to %s", tarball_bin_dir, root_bin_dir)
+ LOGGER.debug("Symlink %s to %s", tarball_bin_dir, root_bin_dir)
symlink_dir(tarball_bin_dir, root_bin_dir)
@@ -380,6 +465,40 @@ def call_remote_operation(local_ops, remote_python, script_name, client_args, op
return ret, output
+class NamedTempFile(object):
+ """Class to control temporary files."""
+
+ _FILE_MAP = {}
+
+ @classmethod
+ def create(cls, suffix=""):
+ """Creates a temporary file and returns the file name."""
+ temp_file = tempfile.NamedTemporaryFile(suffix=suffix, delete=False)
+ cls._FILE_MAP[temp_file.name] = temp_file
+ return temp_file.name
+
+ @classmethod
+ def get(cls, name):
+ """Gets temporary file object. Raises an exception if the file is unknown."""
+ if name not in cls._FILE_MAP:
+ raise Exception("Unknown temporary file {}.".format(name))
+ return cls._FILE_MAP[name]
+
+ @classmethod
+ def delete(cls, name):
+ """Deletes temporary file. Raises an exception if the file is unknown."""
+ if name not in cls._FILE_MAP:
+ raise Exception("Unknown temporary file {}.".format(name))
+ os.remove(name)
+ del cls._FILE_MAP[name]
+
+ @classmethod
+ def delete_all(cls):
+ """Deletes all temporary files."""
+ for name in list(cls._FILE_MAP):
+ cls.delete(name)
+
+
class ProcessControl(object):
""" Process control class.
@@ -430,14 +549,15 @@ class ProcessControl(object):
return True
return False
- def terminate(self):
- """ Terminates all running processes that match the list of pids. """
+ def kill(self):
+ """ Kills all running processes that match the list of pids. """
if self.is_running():
for proc in self.get_procs():
try:
- proc.terminate()
+ proc.kill()
except psutil.NoSuchProcess:
- LOGGER.info("Could not terminate pid %d, process no longer exists", proc.pid)
+ LOGGER.info("Could not kill process with pid %d, as it no longer exists",
+ proc.pid)
class WindowsService(object):
@@ -611,7 +731,7 @@ class PosixService(object):
def stop(self):
""" Stop process. Returns (code, output) tuple. """
proc = ProcessControl(name=self.bin_name)
- proc.terminate()
+ proc.kill()
self.pids = []
return 0, None
@@ -907,11 +1027,7 @@ def internal_crash(use_sudo=False):
# Windows does not have a way to immediately crash itself. It's
# better to use an external mechanism instead.
if _IS_WINDOWS:
- # Sleep after issuing shutdown, to prevent the 'client' side script
- # continuing, as shutdown is no immediate.
- cmds = """
- shutdown /r /f /t 0 ;
- sleep 10"""
+ cmds = "shutdown /r /f /t 0"
ret, output = execute_cmd(cmds, use_file=True)
return ret, output
else:
@@ -937,7 +1053,7 @@ def internal_crash(use_sudo=False):
return 1, "Crash did not occur"
-def crash_server(options, crash_canary, local_ops, script_name, client_args):
+def crash_server(options, crash_canary, canary_port, local_ops, script_name, client_args):
""" Crashes server and optionally writes canary doc before crash. """
crash_wait_time = options.crash_wait_time + random.randint(0, options.crash_wait_time_jitter)
@@ -962,7 +1078,8 @@ def crash_server(options, crash_canary, local_ops, script_name, client_args):
if options.canary == "remote":
# The crash canary function executes remotely, only if the
# crash_method is 'internal'.
- canary = "--docForCanary \"{}\"".format(crash_canary["args"][3])
+ canary = "--mongodPort {} --docForCanary \"{}\"".format(
+ canary_port, crash_canary["args"][3])
canary_cmd = "insert_canary"
else:
canary = ""
@@ -1011,7 +1128,7 @@ def get_mongo_client_args(options):
""" Returns keyword arg dict used in PyMongo client. """
mongo_args = {}
# Set the writeConcern
- mongo_args = options.write_concern
+ mongo_args = yaml.safe_load(options.write_concern)
# Set the readConcernLevel
if options.read_concern_level:
mongo_args["readConcernLevel"] = options.read_concern_level
@@ -1146,9 +1263,58 @@ def mongo_insert_canary(mongo, db_name, coll_name, doc):
return 0 if res.inserted_id else 1
+def new_resmoke_config(config_file, new_config_file, test_data):
+ """ Creates 'new_config_file', from 'config_file', with an update from 'test_data'. """
+ new_config = {
+ "executor": {
+ "config": {
+ "shell_options": {
+ "global_vars": {
+ "TestData": test_data
+ }
+ }
+ }
+ }
+ }
+ with open(config_file, "r") as yaml_stream:
+ config = yaml.load(yaml_stream)
+ config.update(new_config)
+ with open(new_config_file, "w") as yaml_stream:
+ yaml.safe_dump(config, yaml_stream)
+
+
+def resmoke_client(work_dir,
+ mongo_path,
+ host_port,
+ js_test,
+ resmoke_suite,
+ no_wait=False,
+ log_file=None):
+ """Starts resmoke client from work_dir, connecting to host_port and executes js_test."""
+ log_output = "2>& 1 | tee -a {}".format(log_file) if log_file else ""
+ cmds = ("cd {} ; "
+ "python buildscripts/resmoke.py "
+ "--mongo {} "
+ "--suites {} "
+ "--shellConnString mongodb://{} "
+ "--continueOnFailure "
+ "{} "
+ "{}".format(
+ work_dir, mongo_path, resmoke_suite, host_port, js_test, log_output))
+ ret, output, pid = None, None, None
+ if no_wait:
+ pid = start_cmd(cmds, use_file=True)
+ else:
+ ret, output = execute_cmd(cmds, use_file=True)
+ return ret, output, pid
+
+
def main():
""" Main program. """
+
+ atexit.register(exit_handler)
+
parser = optparse.OptionParser(usage="""
%prog [options]
@@ -1179,6 +1345,7 @@ Examples:
crash_options = optparse.OptionGroup(parser, "Crash Options")
mongodb_options = optparse.OptionGroup(parser, "MongoDB Options")
mongod_options = optparse.OptionGroup(parser, "mongod Options")
+ client_options = optparse.OptionGroup(parser, "Client Options")
program_options = optparse.OptionGroup(parser, "Program Options")
# Test options
@@ -1199,18 +1366,6 @@ Examples:
" which are added to '{}'".format(default_ssh_connection_options),
default=None)
- test_options.add_option("--mongoPath",
- dest="mongo_path",
- help="Path to mongo (shell) executable, if unspecifed, mongo client"
- " is launched from $PATH",
- default="mongo")
-
- test_options.add_option("--mongoRepoRootDir",
- dest="mongo_repo_root_dir",
- help="Root directory of mongoDB repository, defaults to current"
- " directory.",
- default=None)
-
test_options.add_option("--testLoops",
dest="num_loops",
help="Number of powercycle loops to run [default: %default]",
@@ -1400,6 +1555,60 @@ Examples:
" 'source venv/bin/activate; python'",
default="python")
+ # Client options
+ mongo_path = distutils.spawn.find_executable(
+ "mongo", os.getcwd() + os.pathsep + os.environ["PATH"])
+ if mongo_path:
+ mongo_path = os.path.abspath(mongo_path)
+ client_options.add_option("--mongoPath",
+ dest="mongo_path",
+ help="Path to mongo (shell) executable, if unspecifed, mongo client"
+ " is launched from the current directory.",
+ default=mongo_path)
+
+ client_options.add_option("--mongoRepoRootDir",
+ dest="mongo_repo_root_dir",
+ help="Root directory of mongoDB repository, defaults to current"
+ " directory.",
+ default=None)
+
+ client_options.add_option("--crudClient",
+ dest="crud_client",
+ help="The path to the CRUD client script on the local host"
+ " [default: '%default'].",
+ default="jstests/hooks/crud_client.js")
+
+ client_options.add_option("--configCrudClient",
+ dest="config_crud_client",
+ help="The path to the CRUD client configuration YML file on the"
+ " local host. This is the resmoke.py suite file. If"
+ " unspecified, a default configuration will be used that"
+ " provides a mongo (shell) DB connection to a running mongod.",
+ default=None)
+
+ client_options.add_option("--numCrudClients",
+ dest="num_crud_clients",
+ help="The number of concurrent CRUD clients to run"
+ " [default: '%default'].",
+ type="int",
+ default=1)
+
+ client_options.add_option("--numFsmClients",
+ dest="num_fsm_clients",
+ help="The number of concurrent FSM clients to run"
+ " [default: '%default'].",
+ type="int",
+ default=0)
+
+ client_options.add_option("--fsmWorkloadFiles",
+ dest="fsm_workload_files",
+ help="A list of the FSM workload files to execute. More than one"
+ " file can be specified either in a comma-delimited string,"
+ " or by specifying this option more than once. If unspecified,"
+ " then all FSM workload files are executed.",
+ action="append",
+ default=[])
+
# Program options
program_options.add_option("--remoteSudo",
dest="remote_sudo",
@@ -1458,6 +1667,7 @@ Examples:
parser.add_option_group(test_options)
parser.add_option_group(crash_options)
+ parser.add_option_group(client_options)
parser.add_option_group(mongodb_options)
parser.add_option_group(mongod_options)
parser.add_option_group(program_options)
@@ -1486,7 +1696,7 @@ Examples:
mongod_options_map = parse_options(options.mongod_options)
# Error out earlier if these options are not properly specified
- options.write_concern = yaml.safe_load(options.write_concern)
+ write_concern = yaml.safe_load(options.write_concern)
options.canary_doc = yaml.safe_load(options.canary_doc)
# Invoke remote_handler if remote_operation is specified.
@@ -1511,19 +1721,41 @@ Examples:
rsync_cmd = ""
rsync_opt = ""
- # Setup the mongo_repo_root
- mongo_repo_root_dir = "." if not options.mongo_repo_root_dir else options.mongo_repo_root_dir
+ # Setup the mongo client, mongo_path is required if there are local clients.
+ if (options.num_crud_clients > 0 or
+ options.num_fsm_clients > 0 or
+ options.validate_collections == "local"):
+ if options.mongo_path is None:
+ LOGGER.error("mongoPath must be specified")
+ sys.exit(1)
+ elif not os.path.isfile(options.mongo_path):
+ LOGGER.error("mongoPath %s does not exist", options.mongo_path)
+ sys.exit(1)
+
+ # Setup the CRUD & FSM clients.
+ with_external_server = "buildscripts/resmokeconfig/suites/with_external_server.yml"
+ config_crud_client = options.config_crud_client
+ fsm_client = "jstests/libs/fsm_serial_client.js"
+ fsm_workload_files = []
+ for fsm_workload_file in options.fsm_workload_files:
+ fsm_workload_files += fsm_workload_file.replace(" ", "").split(",")
+
+ # Setup the mongo_repo_root.
+ if options.mongo_repo_root_dir:
+ mongo_repo_root_dir = options.mongo_repo_root_dir
+ else:
+ mongo_repo_root_dir = os.getcwd()
if not os.path.isdir(mongo_repo_root_dir):
LOGGER.error("mongoRepoRoot %s does not exist", mongo_repo_root_dir)
sys.exit(1)
- # Setup the validate_collections option
+ # Setup the validate_collections option.
if options.validate_collections == "remote":
validate_collections_cmd = "validate_collections"
else:
validate_collections_cmd = ""
- # Setup the validate_canary option
+ # Setup the validate_canary option.
if options.canary and "nojournal" in mongod_options_map:
LOGGER.error("Cannot create and validate canary documents if the mongod option"
" '--nojournal' is used.")
@@ -1666,12 +1898,17 @@ Examples:
# Optionally, run local validation of collections.
if options.validate_collections == "local":
- cmds = """
- TestData = {};
- TestData.skipValidationOnNamespaceNotFound = true;
- load("jstests/hooks/run_validate_collections.js");"""
host_port = "{}:{}".format(mongod_host, secret_port)
- ret, output = mongo_shell(options.mongo_path, mongo_repo_root_dir, host_port, cmds)
+ new_config_file = NamedTempFile.create(suffix=".yml")
+ test_data = {"skipValidationOnNamespaceNotFound": True}
+ new_resmoke_config(with_external_server, new_config_file, test_data)
+ ret, output, _ = resmoke_client(
+ mongo_repo_root_dir,
+ options.mongo_path,
+ host_port,
+ "jstests/hooks/run_validate_collections.js",
+ new_config_file)
+ NamedTempFile.delete(new_config_file)
LOGGER.info("Collection validation: %d %s", ret, output)
if ret:
sys.exit(ret)
@@ -1721,7 +1958,57 @@ Examples:
if ret:
sys.exit(ret)
- # TODO SERVER-30802: Add CRUD & FSM clients
+ # Start CRUD clients
+ crud_pids = []
+ if options.num_crud_clients > 0:
+ host_port = "{}:{}".format(mongod_host, standard_port)
+ test_data = {"dbName": options.db_name}
+ if options.read_concern_level:
+ test_data["readConcern"] = {"level": options.read_concern_level}
+ if write_concern:
+ test_data["writeConcern"] = write_concern
+
+ for i in xrange(options.num_crud_clients):
+ crud_config_file = NamedTempFile.create(suffix=".yml")
+ test_data["collectionName"] = "{}-{}".format(options.collection_name, i)
+ new_resmoke_config(with_external_server, crud_config_file, test_data)
+ _, _, pid = resmoke_client(
+ work_dir=mongo_repo_root_dir,
+ mongo_path=options.mongo_path,
+ host_port=host_port,
+ js_test=options.crud_client,
+ resmoke_suite=crud_config_file,
+ no_wait=True,
+ log_file="crud_{}.log".format(i))
+ crud_pids.append(pid)
+
+ LOGGER.info(
+ "****Started %d CRUD client(s) %s****", options.num_crud_clients, crud_pids)
+
+ # Start FSM clients
+ fsm_pids = []
+ if options.num_fsm_clients > 0:
+ test_data = {"fsmDbBlacklist": [options.db_name]}
+ if fsm_workload_files:
+ test_data["workloadFiles"] = fsm_workload_files
+
+ for i in xrange(options.num_fsm_clients):
+ fsm_config_file = NamedTempFile.create(suffix=".yml")
+ test_data["dbNamePrefix"] = "fsm-{}".format(i)
+ # Do collection validation only for the first FSM client.
+ test_data["validateCollections"] = True if i == 0 else False
+ new_resmoke_config(with_external_server, fsm_config_file, test_data)
+ _, _, pid = resmoke_client(
+ work_dir=mongo_repo_root_dir,
+ mongo_path=options.mongo_path,
+ host_port=host_port,
+ js_test=fsm_client,
+ resmoke_suite=fsm_config_file,
+ no_wait=True,
+ log_file="fsm_{}.log".format(i))
+ fsm_pids.append(pid)
+
+ LOGGER.info("****Started %d FSM client(s) %s****", options.num_fsm_clients, fsm_pids)
# Crash the server. A pre-crash canary document is optionally written to the DB.
crash_canary = {}
@@ -1729,17 +2016,21 @@ Examples:
canary_doc = {"x": time.time()}
orig_canary_doc = copy.deepcopy(canary_doc)
mongo_opts = get_mongo_client_args(options)
- mongo = pymongo.MongoClient(
- host=mongod_host, port=standard_port, **mongo_opts)
+ mongo = pymongo.MongoClient(host=mongod_host, port=standard_port, **mongo_opts)
crash_canary["function"] = mongo_insert_canary
crash_canary["args"] = [
mongo,
options.db_name,
options.collection_name,
canary_doc]
- crash_server(options, crash_canary, local_ops, script_name, client_args)
+ crash_server(options, crash_canary, standard_port, local_ops, script_name, client_args)
+ # Wait a bit after sending command to crash the server to avoid connecting to the
+ # server before the actual crash occurs.
+ time.sleep(10)
canary_doc = copy.deepcopy(orig_canary_doc)
+ kill_processes(crud_pids + fsm_pids)
+
test_time = int(time.time()) - start_time
LOGGER.info("****Completed test loop %d test time %d seconds****", loop_num, test_time)
if loop_num == options.num_loops or test_time >= options.test_time: