diff options
-rwxr-xr-x | buildscripts/aws_ec2.py | 10 | ||||
-rwxr-xr-x | buildscripts/remote_operations.py | 6 | ||||
-rw-r--r-- | etc/evergreen.yml | 86 | ||||
-rwxr-xr-x | pytests/powertest.py | 378 |
4 files changed, 337 insertions, 143 deletions
diff --git a/buildscripts/aws_ec2.py b/buildscripts/aws_ec2.py index 6becdeaa6d6..58fc1892664 100755 --- a/buildscripts/aws_ec2.py +++ b/buildscripts/aws_ec2.py @@ -56,6 +56,7 @@ class AwsEc2(object): print(".", end="", file=sys.stdout) sys.stdout.flush() try: + client_error = "" time_left = end_time - time.time() instance.load() if instance.state["Name"] == state: @@ -63,18 +64,19 @@ class AwsEc2(object): break if time_left <= 0: break - except botocore.exceptions.ClientError: + except botocore.exceptions.ClientError as err: # A ClientError exception can sometimes be generated, due to RequestLimitExceeded, # so we ignore it and retry until we time out. - pass + client_error = " {}".format(err.message) + wait_interval_secs = 15 if time_left > 15 else time_left time.sleep(wait_interval_secs) if show_progress: if reached_state: print(" Instance {}!".format(instance.state["Name"]), file=sys.stdout) else: - print(" Instance in state '{}', failed to reach state '{}'!".format( - instance.state["Name"], state), file=sys.stdout) + print(" Instance in state '{}', failed to reach state '{}'{}!".format( + instance.state["Name"], state, client_error), file=sys.stdout) sys.stdout.flush() return 0 if reached_state else 1 diff --git a/buildscripts/remote_operations.py b/buildscripts/remote_operations.py index e2436ece3d9..1595cc0e03b 100755 --- a/buildscripts/remote_operations.py +++ b/buildscripts/remote_operations.py @@ -35,7 +35,11 @@ _IS_WINDOWS = sys.platform == "win32" or sys.platform == "cygwin" _OPERATIONS = ["shell", "copy_to", "copy_from"] -_SSH_CONNECTION_ERRORS = ["System is booting up.", "Permission denied"] +_SSH_CONNECTION_ERRORS = [ + "System is booting up.", + "Permission denied", + "ssh_exchange_identification: read: Connection reset by peer" +] def posix_path(path): diff --git a/etc/evergreen.yml b/etc/evergreen.yml index 234d656e541..2ca724dfbc3 100644 --- a/etc/evergreen.yml +++ b/etc/evergreen.yml @@ -178,14 +178,14 @@ variables: - &powercycle_test ec2_artifacts: ${monitor_proc_file} ${monitor_system_file} ${log_path} ${backup_path_after} ${backup_path_before} - program_options: --logLevel info --backupPathBefore ${backup_path_before} --backupPathAfter ${backup_path_after} - connection_options: --sshUserHost ${ip_address} --sshConnection \"${ssh_identity} ${ssh_connection_options}\" - test_options: --testLoops 15 --seedDocNum 10000 --rsync --validate local --canary local - crash_options: "--crashMethod aws_ec2 --crashOptions ${instance_id}:private_ip_address --crashWaitTime 30 --jitterForCrashWaitTime 5" - client_options: --numCrudClients 20 --numFsmClients 20 - mongodb_options: --rootDir ${remote_dir}-${task_id} --mongodbBinDir ${remote_dir} - mongod_options: --mongodUsablePorts ${standard_port} ${secret_port} --dbPath ${db_path} --logPath ${log_path} - mongod_extra_options: --mongodOptions \"--setParameter enableTestCommands=1\" + program_options: --logLevel=info --backupPathBefore=${backup_path_before} --backupPathAfter=${backup_path_after} + connection_options: --sshUserHost=${ip_address} --sshConnection=\"${ssh_identity} ${ssh_connection_options}\" + test_options: --testLoops=15 --seedDocNum=10000 --rsync --validate=local --canary=local + crash_options: "--crashMethod=aws_ec2 --crashOptions=${instance_id}:private_ip_address --crashWaitTime=45 --jitterForCrashWaitTime=5" + client_options: --numCrudClients=20 --numFsmClients=20 + mongodb_options: --rootDir=${remote_dir}-${task_id} --mongodbBinDir=${remote_dir} + mongod_options: --mongodUsablePorts ${standard_port} ${secret_port} --dbPath=${db_path} --logPath=${log_path} + mongod_extra_options: --mongodOptions=\"--setParameter enableTestCommands=1\" ####################################### # Functions # @@ -853,6 +853,7 @@ functions: type: test params: working_dir: src + shell: bash script: | set -o verbose set -o errexit @@ -869,35 +870,47 @@ functions: cmds="source ${virtualenv_dir|venv}/$bin_dir/activate" cmds="$cmds; python -u" # The remote python operates in a virtualenv - remote_python="--remotePython \"$cmds\"" + remote_python="--remotePython=\"$cmds\"" + # Initialize report.json. The report will be finalized by powertest.py. + report_json="report.json" start_time=$(date +%s) - status="\"pass\"" + status="\"fail\"" failures=0 + exit_code=1 + end_time=$start_time + elapsed_secs=0 + echo "{\"failures\": $failures, \"results\": [{\"status\": $status, \"exit_code\": $exit_code, \"test_file\": \"${task_name}\", \"start\": $start_time, \"end\": $end_time, \"elapsed\": $elapsed_secs}]}" > $report_json + generate_report_json="--reportJsonFile=$report_json" - set +o errexit - eval python -u pytests/powertest.py \ - "${connection_options} \ + # # On Windows execute 10 test loops, cap the maximum number of clients to 10 each. + if [ "Windows_NT" = "$OS" ]; then + test_override="--testLoops=10" + max_clients=10 + for client in --numCrudClients --numFsmClients + do + override=$(echo ${client_options} | awk "BEGIN {FS=\" |=\"} {for(j=1;j<=NF;j++) if (\$j~/^$client/) {min=(\$(j+1) < $max_clients) ? \$(j+1) : $max_clients; printf \"%s=%d\", \$j,min}}") + client_override="$client_override $override" + done + fi + + config_file=powertest.yml + eval python pytests/powertest.py \ + "--saveConfigOptions=$config_file \ + ${connection_options} \ ${program_options} \ + $generate_report_json \ $remote_sudo \ $remote_python \ ${test_options} \ + $test_override \ ${crash_options} \ ${client_options} \ + $client_override \ ${mongodb_options} \ ${mongod_options} \ ${mongod_extra_options}" - exit_code=$? - - # Create report.json - end_time=$(date +%s) - elapsed_secs=$((end_time-start_time)) - if [ $exit_code -ne 0 ]; then - status="\"fail\"" - failures=1 - fi - echo "{\"failures\": $failures, \"results\": [{\"status\": $status, \"exit_code\": $exit_code, \"test_file\": \"${task_name}\", \"start\": $start_time, \"end\": $end_time, \"elapsed\": $elapsed_secs}]}" > report.json - exit $exit_code + python -u pytests/powertest.py --configFile=$config_file "do multiversion setup" : command: shell.exec @@ -4217,8 +4230,8 @@ tasks: # mongod will not start if it crashed mongod while creating a namespace (SERVER-26499). vars: <<: *powercycle_test - client_options: --numCrudClients 20 --numFsmClients 0 - mongod_extra_options: --mongodOptions \"--setParameter enableTestCommands=1 --storageEngine mmapv1\" + client_options: --numCrudClients=20 --numFsmClients=0 + mongod_extra_options: --mongodOptions=\"--setParameter enableTestCommands=1 --storageEngine mmapv1\" - name: powercycle_WT depends_on: @@ -4237,7 +4250,7 @@ tasks: - func: "run powercycle test" vars: <<: *powercycle_test - mongod_extra_options: --mongodOptions \"--setParameter enableTestCommands=1 --storageEngine wiredTiger\" + mongod_extra_options: --mongodOptions=\"--setParameter enableTestCommands=1 --storageEngine wiredTiger\" - name: powercycle_fcv3.4_WT depends_on: @@ -4256,9 +4269,9 @@ tasks: - func: "run powercycle test" vars: <<: *powercycle_test - client_options: --numCrudClients 20 --numFsmClients 20 --fsmWorkloadBlacklistFiles toggle_feature_compatibility.js - mongod_options: --mongodUsablePorts ${standard_port} ${secret_port} --dbPath ${db_path} --logPath ${log_path} --fcv 3.4 - mongod_extra_options: --mongodOptions \"--setParameter enableTestCommands=1 --storageEngine wiredTiger\" + client_options: --numCrudClients=20 --numFsmClients=20 --fsmWorkloadBlacklistFiles=toggle_feature_compatibility.js + mongod_options: --mongodUsablePorts ${standard_port} ${secret_port} --dbPath=${db_path} --logPath=${log_path} --fcv=3.4 + mongod_extra_options: --mongodOptions=\"--setParameter enableTestCommands=1 --storageEngine wiredTiger\" - name: powercycle_replication_WT depends_on: @@ -4277,7 +4290,7 @@ tasks: - func: "run powercycle test" vars: <<: *powercycle_test - mongod_extra_options: --replSet powercyle --mongodOptions \"--setParameter enableTestCommands=1 --storageEngine wiredTiger\" + mongod_extra_options: --replSet=powercyle --mongodOptions=\"--setParameter enableTestCommands=1 --storageEngine wiredTiger\" - name: powercycle_replication_smalloplog_WT depends_on: @@ -4296,7 +4309,7 @@ tasks: - func: "run powercycle test" vars: <<: *powercycle_test - mongod_extra_options: --replSet powercyle --mongodOptions \"--setParameter enableTestCommands=1 --oplogSize 1 --storageEngine wiredTiger\" + mongod_extra_options: --replSet=powercyle --mongodOptions=\"--setParameter enableTestCommands=1 --oplogSize 20 --storageEngine wiredTiger\" - name: powercycle_syncdelay_WT depends_on: @@ -4315,7 +4328,7 @@ tasks: - func: "run powercycle test" vars: <<: *powercycle_test - mongod_extra_options: --mongodOptions \"--setParameter enableTestCommands=1 --syncdelay 10 --storageEngine wiredTiger\" + mongod_extra_options: --mongodOptions=\"--setParameter enableTestCommands=1 --syncdelay 10 --storageEngine wiredTiger\" - name: powercycle_write_concern_majority_WT depends_on: @@ -4334,7 +4347,7 @@ tasks: - func: "run powercycle test" vars: <<: *powercycle_test - client_options: "--numCrudClients 20 --numFsmClients 20 --writeConcern '{\"w\": \"majority\"}'" + client_options: "--numCrudClients=20 --numFsmClients=20 --writeConcern='{\"w\": \"majority\"}'" mongod_extra_options: --mongodOptions \"--setParameter enableTestCommands=1 --storageEngine wiredTiger\" - name: idl_tests @@ -6459,6 +6472,11 @@ buildvariants: - name: ssl - name: sslSpecial - name: tool_WT + - name: powercycle + - name: powercycle_WT + - name: powercycle_fcv3.4_WT + - name: powercycle_replication_WT + - name: powercycle_syncdelay_WT - name: push distros: - rhel70-small diff --git a/pytests/powertest.py b/pytests/powertest.py index eec89171911..2fe5a5d8fa5 100755 --- a/pytests/powertest.py +++ b/pytests/powertest.py @@ -12,6 +12,7 @@ import collections import copy import datetime import distutils.spawn +import json import importlib import logging import optparse @@ -91,6 +92,10 @@ __version__ = "0.1" LOGGER = logging.getLogger(__name__) +_report_json_success = False +_report_json = {} +_report_json_file = "" + """ Client & server side powercycle test script. This script can be run against any host which is reachable via ssh. @@ -105,8 +110,43 @@ LOGGER = logging.getLogger(__name__) def exit_handler(): - """Exit handler, deletes all named temporary files.""" - LOGGER.debug("Exit handler invoked, cleaning up temporary files") + """Exit handler actions: + - Generate report.json + - Kill spawned processes + - Delete all named temporary files + """ + if _report_json: + LOGGER.debug("Exit handler: Updating report file %s", _report_json_file) + try: + test_start = _report_json["results"][0]["start"] + test_end = int(time.time()) + test_time = test_end - test_start + if _report_json_success: + failures = 0 + status = "pass" + exit_code = 0 + else: + failures = 1 + status = "fail" + exit_code = 1 + _report_json["failures"] = failures + _report_json["results"][0]["status"] = status + _report_json["results"][0]["exit_code"] = exit_code + _report_json["results"][0]["end"] = test_end + _report_json["results"][0]["elapsed"] = test_time + with open(_report_json_file, "w") as jstream: + json.dump(_report_json, jstream) + LOGGER.debug("Exit handler: report file contents %s", _report_json) + except: + pass + + LOGGER.debug("Exit handler: Killing processes") + try: + Processes.kill_all() + except: + pass + + LOGGER.debug("Exit handler: Cleaning up temporary files") try: NamedTempFile.delete_all() except: @@ -115,7 +155,7 @@ def exit_handler(): def child_processes(parent_pid): """Returns a list of all child processes for a pid.""" - # The child processes cannot be obtained from the parent on Windows. See + # The child processes cannot be obtained from the parent on Windows from psutil. See # https://stackoverflow.com/questions/30220732/python-psutil-not-showing-all-child-processes child_procs = [] while psutil.pid_exists(parent_pid): @@ -136,20 +176,19 @@ def kill_process(pid, kill_children=True): try: parent = psutil.Process(pid) except psutil.NoSuchProcess: - LOGGER.error("Could not kill process %d, as it no longer exists", pid) + LOGGER.warn("Could not kill process %d, as it no longer exists", pid) return 0 - procs = [] + procs = [parent] if kill_children: procs += child_processes(pid) - procs.append(parent) for proc in procs: try: - LOGGER.debug("Killing process %d", proc.pid) + LOGGER.debug("Killing process '%s' pid %d", proc.name(), proc.pid) proc.kill() except psutil.NoSuchProcess: - LOGGER.error("Could not kill process %d, as it no longer exists", pid) + LOGGER.warn("Could not kill process %d, as it no longer exists", pid) _, alive = psutil.wait_procs(procs, timeout=30, callback=None) if alive: @@ -161,10 +200,10 @@ def kill_process(pid, kill_children=True): def kill_processes(procs, kill_children=True): """Kill a list of processes and optionally it's children.""" for proc in procs: - LOGGER.debug("Killing parent process %d", proc.pid) + LOGGER.debug("Starting kill of parent process %d", proc.pid) kill_process(proc.pid, kill_children=kill_children) ret = proc.wait() - LOGGER.debug("Kill of parent process %d has return code of %d", proc.pid, ret) + LOGGER.debug("Finished kill of parent process %d has return code of %d", proc.pid, ret) def get_extension(filename): @@ -203,7 +242,7 @@ def get_bin_dir(root_dir): def create_temp_executable_file(cmds): """Creates an executable temporary file containing 'cmds'. Returns file name.""" - temp_file_name = NamedTempFile.create(suffix=".sh", dir="tmp") + temp_file_name = NamedTempFile.create(suffix=".sh", directory="tmp") with NamedTempFile.get(temp_file_name) as temp_file: temp_file.write(cmds) os_st = os.stat(temp_file_name) @@ -233,6 +272,7 @@ def start_cmd(cmd, use_file=False): LOGGER.debug("Executing '%s'", cmd) proc = subprocess.Popen(cmd, close_fds=True) + LOGGER.debug("Spawned process %s pid %d", psutil.Process(proc.pid).name(), proc.pid) return proc @@ -509,6 +549,31 @@ def call_remote_operation(local_ops, remote_python, script_name, client_args, op return ret, output +class Processes(object): + """Class to create and kill spawned processes.""" + + _PROC_LIST = [] + + @classmethod + def create(cls, cmds): + """Creates a spawned process.""" + proc = start_cmd(cmds, use_file=True) + cls._PROC_LIST.append(proc) + + @classmethod + def kill(cls, proc): + """Kills a spawned process and all it's children.""" + kill_processes([proc], kill_children=True) + cls._PROC_LIST.remove(proc) + + @classmethod + def kill_all(cls): + """Kill all spawned processes.""" + procs = copy.copy(cls._PROC_LIST) + for proc in procs: + cls.kill(proc) + + class NamedTempFile(object): """Class to control temporary files.""" @@ -516,12 +581,13 @@ class NamedTempFile(object): _DIR_LIST = [] @classmethod - def create(cls, dir=None, suffix=""): + def create(cls, directory=None, suffix=""): """Creates a temporary file, and optional directory, and returns the file name.""" - if dir and not os.path.isdir(dir): - os.makedirs(dir) - cls._DIR_LIST.append(dir) - temp_file = tempfile.NamedTemporaryFile(suffix=suffix, dir=dir, delete=False) + if directory and not os.path.isdir(directory): + LOGGER.debug("Creating temporary directory %s", directory) + os.makedirs(directory) + cls._DIR_LIST.append(directory) + temp_file = tempfile.NamedTemporaryFile(suffix=suffix, dir=directory, delete=False) cls._FILE_MAP[temp_file.name] = temp_file return temp_file.name @@ -537,32 +603,41 @@ class NamedTempFile(object): """Deletes temporary file. Raises an exception if the file is unknown.""" if name not in cls._FILE_MAP: raise Exception("Unknown temporary file {}.".format(name)) + if not os.path.exists(name): + LOGGER.debug("Temporary file %s no longer exists", name) + del cls._FILE_MAP[name] + return try: os.remove(name) except (IOError, OSError) as err: - LOGGER.warn("Unable to delete temporary file {} with error {}".format(name, err)) + LOGGER.warn("Unable to delete temporary file %s with error %s", name, err) if not os.path.exists(name): del cls._FILE_MAP[name] @classmethod - def delete_dir(cls, dir): + def delete_dir(cls, directory): """Deletes temporary directory. Raises an exception if the directory is unknown.""" - if dir not in cls._DIR_LIST: - raise Exception("Unknown temporary directory {}.".format(dir)) + if directory not in cls._DIR_LIST: + raise Exception("Unknown temporary directory {}.".format(directory)) + if not os.path.exists(directory): + LOGGER.debug("Temporary directory %s no longer exists", directory) + cls._DIR_LIST.remove(directory) + return try: - shutil.rmtree(dir) + shutil.rmtree(directory) except (IOError, OSError) as err: - LOGGER.warn("Unable to delete temporary directory {} with error {}".format(dir, err)) - if not os.path.exists(dir): - cls._DIR_LIST.remove(dir) + LOGGER.warn( + "Unable to delete temporary directory %s with error %s", directory, err) + if not os.path.exists(directory): + cls._DIR_LIST.remove(directory) @classmethod def delete_all(cls): """Deletes all temporary files and directories.""" for name in list(cls._FILE_MAP): cls.delete(name) - for dir in cls._DIR_LIST: - cls.delete_dir(dir) + for directory in cls._DIR_LIST: + cls.delete_dir(directory) class ProcessControl(object): @@ -750,8 +825,7 @@ class WindowsService(object): win32serviceutil.QueryServiceStatus(serviceName=self.name)) if svc_state in self._states: return self._states[svc_state] - else: - return "unknown" + return "unknown" except pywintypes.error: return "not installed" @@ -982,12 +1056,20 @@ def remote_handler(options, operations): port=options.port, options=options.mongod_options) + mongo_client_opts = get_mongo_client_args(options, host="localhost", port=options.port) + # Perform the sequence of operations specified. If any operation fails # then return immediately. for operation in operations: # This is the internal "crash" mechanism, which is executed on the remote host. if operation == "crash_server": ret, output = internal_crash(options.remote_sudo) + # An internal crash on Windows is not immediate + try: + LOGGER.info("Waiting after issuing internal crash!") + time.sleep(30) + except IOError: + pass elif operation == "install_mongod": ret, output = mongod.install(root_dir, options.tarball_url) @@ -1023,7 +1105,7 @@ def remote_handler(options, operations): return ret LOGGER.info("Started mongod running on port %d pid %s", options.port, mongod.get_pids()) - mongo = pymongo.MongoClient(host="localhost", port=options.port) + mongo = pymongo.MongoClient(**mongo_client_opts) LOGGER.info("Server buildinfo: %s", mongo.admin.command("buildinfo")) LOGGER.info("Server serverStatus: %s", mongo.admin.command("serverStatus")) if options.use_replica_set and options.repl_set: @@ -1036,7 +1118,7 @@ def remote_handler(options, operations): ret = wait_for_mongod_shutdown(options.db_path) elif operation == "shutdown_mongod": - mongo = pymongo.MongoClient(host="localhost", port=options.port) + mongo = pymongo.MongoClient(**mongo_client_opts) try: mongo.admin.command("shutdown", force=True) except pymongo.errors.AutoReconnect: @@ -1048,26 +1130,26 @@ def remote_handler(options, operations): LOGGER.info(output) elif operation == "seed_docs": - mongo = pymongo.MongoClient(host="localhost", port=options.port) + mongo = pymongo.MongoClient(**mongo_client_opts) ret = mongo_seed_docs( mongo, options.db_name, options.collection_name, options.seed_doc_num) elif operation == "validate_collections": - mongo = pymongo.MongoClient(host="localhost", port=options.port) + mongo = pymongo.MongoClient(**mongo_client_opts) ret = mongo_validate_collections(mongo) elif operation == "insert_canary": - mongo = pymongo.MongoClient(host="localhost", port=options.port) + mongo = pymongo.MongoClient(**mongo_client_opts) ret = mongo_insert_canary( mongo, options.db_name, options.collection_name, options.canary_doc) elif operation == "validate_canary": - mongo = pymongo.MongoClient(host="localhost", port=options.port) + mongo = pymongo.MongoClient(**mongo_client_opts) ret = mongo_validate_canary( mongo, options.db_name, options.collection_name, options.canary_doc) elif operation == "set_fcv": - mongo = pymongo.MongoClient(host="localhost", port=options.port) + mongo = pymongo.MongoClient(**mongo_client_opts) try: ret = mongo.admin.command("setFeatureCompatibilityVersion", options.fcv_version) ret = 0 if ret["ok"] == 1 else 1 @@ -1170,11 +1252,11 @@ def crash_server(options, crash_canary, canary_port, local_ops, script_name, cli ec2 = aws_ec2.AwsEc2() crash_func = ec2.control_instance instance_id, _ = get_aws_crash_options(options.crash_options) - crash_args = ["force-stop", instance_id, 240, True] + crash_args = ["force-stop", instance_id, 600, True] else: message = "Unsupported crash method '{}' provided".format(options.crash_method) - LOGGER.error("Unsupported crash method '%s' provided", message) + LOGGER.error(message) return 1, message # Invoke the crash canary function, right before crashing the server. @@ -1202,14 +1284,21 @@ def wait_for_mongod_shutdown(data_dir, timeout=120): return 0 -def get_mongo_client_args(options): +def get_mongo_client_args(options, host=None, port=None): """ Returns keyword arg dict used in PyMongo client. """ - mongo_args = {} + # Set the serverSelectionTimeoutMS to 600 seconds + mongo_args = {"serverSelectionTimeoutMS": 600000} + # Set the serverSelectionTimeoutMS to 120 seconds + mongo_args["socketTimeoutMS"] = 120000 # Set the writeConcern mongo_args = yaml.safe_load(options.write_concern) # Set the readConcernLevel if options.read_concern_level: mongo_args["readConcernLevel"] = options.read_concern_level + if host: + mongo_args["host"] = host + if port: + mongo_args["port"] = port return mongo_args @@ -1218,10 +1307,10 @@ def mongo_shell(mongo_path, work_dir, host_port, mongo_cmds, retries=5, retry_sl cmds = (""" cd {}; echo {} | {} {}""".format( - pipes.quote(work_dir), - pipes.quote(mongo_cmds), - pipes.quote(mongo_path), - host_port)) + pipes.quote(work_dir), + pipes.quote(mongo_cmds), + pipes.quote(mongo_path), + host_port)) attempt_num = 0 while True: ret, output = execute_cmd(cmds, use_file=True) @@ -1346,7 +1435,7 @@ def mongo_validate_canary(mongo, db_name, coll_name, doc): def mongo_insert_canary(mongo, db_name, coll_name, doc): """ Inserts a canary document with 'j' True. Returns 0 if successful. """ - LOGGER.info("Inserting canary document %s", doc) + LOGGER.info("Inserting canary document %s to DB %s Collection %s", doc, db_name, coll_name) coll = mongo[db_name][coll_name].with_options( write_concern=pymongo.write_concern.WriteConcern(j=True)) res = coll.insert_one(doc) @@ -1400,17 +1489,21 @@ def resmoke_client(work_dir, repeat_num, pipes.quote(js_test), log_output)) - ret, output, proc = None, None, None + ret, output = None, None if no_wait: - proc = start_cmd(cmds, use_file=True) + Processes.create(cmds) else: ret, output = execute_cmd(cmds, use_file=True) - return ret, output, proc + return ret, output def main(): """ Main program. """ + global _report_json_success + global _report_json + global _report_json_file + atexit.register(exit_handler) parser = optparse.OptionParser(usage=""" @@ -1482,6 +1575,18 @@ Examples: action="store_true", default=False) + test_options.add_option("--backupPathBefore", + dest="backup_path_before", + help="Path where the db_path is backed up before crash recovery," + " defaults to '<rootDir>/data-beforerecovery'", + default=None) + + test_options.add_option("--backupPathAfter", + dest="backup_path_after", + help="Path where the db_path is backed up after crash recovery," + " defaults to '<rootDir>/data-afterrecovery'", + default=None) + validate_locations = ["local", "remote"] test_options.add_option("--validate", dest="validate_collections", @@ -1651,17 +1756,6 @@ Examples: help="Set the FeatureCompatibilityVersion of mongod.", default=None) - # Program options - program_options.add_option("--remotePython", - dest="remote_python", - help="The python intepreter to use on the remote host" - " [default: '%default']." - " To be able to use a python virtual environment," - " which has already been provisioned on the remote" - " host, specify something similar to this:" - " 'source venv/bin/activate; python'", - default="python") - # Client options mongo_path = distutils.spawn.find_executable( "mongo", os.getcwd() + os.pathsep + os.environ["PATH"]) @@ -1725,6 +1819,37 @@ Examples: default=[]) # Program options + program_options.add_option("--configFile", + dest="config_file", + help="YAML configuration file of program options." + " Option values are mapped to command line option names." + " The command line option overrides any specified options" + " from this file.", + default=None) + + program_options.add_option("--saveConfigOptions", + dest="save_config_options", + help="Save the program options to a YAML configuration file." + " If this options is specified the program only saves" + " the configuration file and exits.", + default=None) + + program_options.add_option("--reportJsonFile", + dest="report_json_file", + help="Create or update the specified report file upon program" + " exit.", + default=None) + + program_options.add_option("--remotePython", + dest="remote_python", + help="The python intepreter to use on the remote host" + " [default: '%default']." + " To be able to use a python virtual environment," + " which has already been provisioned on the remote" + " host, specify something similar to this:" + " 'source venv/bin/activate; python'", + default="python") + program_options.add_option("--remoteSudo", dest="remote_sudo", help="Use sudo on the remote host for priveleged operations." @@ -1763,18 +1888,6 @@ Examples: action="store_true", default=False) - program_options.add_option("--backupPathBefore", - dest="backup_path_before", - help="Path where the db_path is backed up before crash recovery," - " defaults to '<rootDir>/data-beforerecovery/db'", - default=None) - - program_options.add_option("--backupPathAfter", - dest="backup_path_after", - help="Path where the db_path is backed up after crash recovery," - " defaults to '<rootDir>/data-afterrecovery/db'", - default=None) - program_options.add_option("--rsyncDest", dest="rsync_dest", help=optparse.SUPPRESS_HELP, @@ -1797,12 +1910,60 @@ Examples: LOGGER.info("powertest.py invocation: %s", " ".join(sys.argv)) + # Command line options override the config file options. + config_options = None + if options.config_file: + with open(options.config_file) as ystream: + config_options = yaml.safe_load(ystream) + LOGGER.info("Loading config file %s with options %s", options.config_file, config_options) + # Load the options specified in the config_file + parser.set_defaults(**config_options) + options, args = parser.parse_args() + # Disable this option such that the remote side does not load a config_file. + options.config_file = None + config_options["config_file"] = None + + if options.save_config_options: + # Disable this option such that the remote side does not save the config options. + save_config_options = options.save_config_options + options.save_config_options = None + save_options = {} + for opt_group in parser.option_groups: + for opt in opt_group.option_list: + if getattr(options, opt.dest) != opt.default: + save_options[opt.dest] = getattr(options, opt.dest) + LOGGER.info("Config options being saved %s", save_options) + with open(save_config_options, "w") as ystream: + yaml.safe_dump(save_options, ystream, default_flow_style=False) + sys.exit(0) + script_name = os.path.basename(__file__) # Print script name and version. if options.version: print("{}:{}".format(script_name, __version__)) sys.exit(0) + if options.report_json_file: + _report_json_file = options.report_json_file + if _report_json_file and os.path.exists(_report_json_file): + with open(_report_json_file) as jstream: + _report_json = json.load(jstream) + else: + _report_json = { + "failures": 0, + "results": [ + {"status": "fail", + "test_file": __name__, + "exit_code": 0, + "elapsed": 0, + "start": int(time.time()), + "end": int(time.time())} + ] + } + LOGGER.debug("Updating/creating report JSON %s", _report_json) + # Disable this option such that the remote side does not generate report.json + options.report_json_file = None + # Setup the crash options if ((options.crash_method == "aws_ec2" or options.crash_method == "mpower") and options.crash_options is None): @@ -1851,6 +2012,12 @@ Examples: if options.rsync_data: rsync_cmd = "rsync_data" + backup_path_before = options.backup_path_before + if not backup_path_before: + backup_path_before = "{}/data-beforerecovery".format(options.root_dir) + backup_path_after = options.backup_path_after + if not backup_path_after: + backup_path_after = "{}/data-afterrecovery".format(options.root_dir) else: rsync_cmd = "" rsync_opt = "" @@ -1965,8 +2132,8 @@ Examples: # remote host's invocation of this script. elif isinstance(option_value, str) and re.search("\"|'| ", option_value): option_value = "'{}'".format(option_value) - # The tuple options need to be changed to a string. - elif isinstance(option_value, tuple): + # The tuple, list or set options need to be changed to a string. + elif isinstance(option_value, (tuple, list, set)): option_value = " ".join(map(str, option_value)) client_args = "{} {} {}".format(client_args, option.get_opt_string(), option_value) @@ -2013,20 +2180,20 @@ Examples: temp_client_files = [] + validate_canary_local = False if options.canary and loop_num > 1: - canary_opt = "--docForCanary \"{}\"".format(canary_doc) - validate_canary_cmd = "validate_canary" if options.canary else "" + if options.canary == "remote": + canary_opt = "--docForCanary \"{}\"".format(canary_doc) + validate_canary_cmd = "validate_canary" if options.canary else "" + else: + validate_canary_local = True else: canary_opt = "" # Since rsync requires Posix style paths, we do not use os.path.join to # construct the rsync destination directory. if rsync_cmd: - if options.backup_path_before: - rsync_dest = options.backup_path_before - else: - rsync_dest = "{}/data-afterrecovery".format(options.root_dir) - rsync_opt = " --rsyncDest {}".format(rsync_dest) + rsync_opt = "--rsyncDest {}".format(backup_path_before) # Optionally, rsync the pre-recovery database. # Start monogd on the secret port. @@ -2060,20 +2227,30 @@ Examples: if ret: sys.exit(ret) + # Optionally validate canary document locally. + if validate_canary_local: + mongo = pymongo.MongoClient( + **get_mongo_client_args(options, host=mongod_host, port=secret_port)) + ret = mongo_validate_canary( + mongo, options.db_name, options.collection_name, canary_doc) + LOGGER.info("Local canary validation: %d", ret) + if ret: + sys.exit(ret) + # Optionally, run local validation of collections. if options.validate_collections == "local": host_port = "{}:{}".format(mongod_host, secret_port) - new_config_file = NamedTempFile.create(suffix=".yml", dir="tmp") + new_config_file = NamedTempFile.create(suffix=".yml", directory="tmp") temp_client_files.append(new_config_file) validation_test_data = {"skipValidationOnNamespaceNotFound": True} new_resmoke_config(with_external_server, new_config_file, validation_test_data) - ret, output, _ = resmoke_client( + ret, output = resmoke_client( mongo_repo_root_dir, mongo_path, host_port, "jstests/hooks/run_validate_collections.js", new_config_file) - LOGGER.info("Collection validation: %d %s", ret, output) + LOGGER.info("Local collection validation: %d %s", ret, output) if ret: sys.exit(ret) @@ -2094,11 +2271,7 @@ Examples: # Since rsync requires Posix style paths, we do not use os.path.join to # construct the rsync destination directory. if rsync_cmd: - if options.backup_path_after: - rsync_dest = options.backup_path_after - else: - rsync_dest = "{}/data-afterrecovery".format(options.root_dir) - rsync_opt = " --rsyncDest {}".format(rsync_dest) + rsync_opt = "--rsyncDest {}".format(backup_path_after) # Optionally, rsync the post-recovery database. # Start monogd on the standard port. @@ -2110,7 +2283,7 @@ Examples: " {}" " {}" " start_mongod").format( - rsync_opt, standard_port, use_replica_set, rsync_cmd) + rsync_opt, standard_port, use_replica_set, rsync_cmd) ret, output = call_remote_operation( local_ops, options.remote_python, @@ -2124,16 +2297,15 @@ Examples: # Start CRUD clients host_port = "{}:{}".format(mongod_host, standard_port) - crud_procs = [] for i in xrange(options.num_crud_clients): if options.config_crud_client == with_external_server: - crud_config_file = NamedTempFile.create(suffix=".yml", dir="tmp") + crud_config_file = NamedTempFile.create(suffix=".yml", directory="tmp") crud_test_data["collectionName"] = "{}-{}".format(options.collection_name, i) new_resmoke_config( with_external_server, crud_config_file, crud_test_data, eval_str) else: crud_config_file = options.config_crud_client - _, _, proc = resmoke_client( + _, _ = resmoke_client( work_dir=mongo_repo_root_dir, mongo_path=mongo_path, host_port=host_port, @@ -2142,21 +2314,18 @@ Examples: repeat_num=100, no_wait=True, log_file="crud_{}.log".format(i)) - crud_procs.append(proc) - if crud_procs: - LOGGER.info( - "****Started %d CRUD client(s)****", options.num_crud_clients) + if options.num_crud_clients: + LOGGER.info("****Started %d CRUD client(s)****", options.num_crud_clients) # Start FSM clients - fsm_procs = [] for i in xrange(options.num_fsm_clients): - fsm_config_file = NamedTempFile.create(suffix=".yml", dir="tmp") + fsm_config_file = NamedTempFile.create(suffix=".yml", directory="tmp") fsm_test_data["dbNamePrefix"] = "fsm-{}".format(i) # Do collection validation only for the first FSM client. fsm_test_data["validateCollections"] = True if i == 0 else False new_resmoke_config(with_external_server, fsm_config_file, fsm_test_data, eval_str) - _, _, proc = resmoke_client( + _, _ = resmoke_client( work_dir=mongo_repo_root_dir, mongo_path=mongo_path, host_port=host_port, @@ -2165,9 +2334,8 @@ Examples: repeat_num=100, no_wait=True, log_file="fsm_{}.log".format(i)) - fsm_procs.append(proc) - if fsm_procs: + if options.num_fsm_clients: LOGGER.info("****Started %d FSM client(s)****", options.num_fsm_clients) # Crash the server. A pre-crash canary document is optionally written to the DB. @@ -2175,8 +2343,8 @@ Examples: if options.canary: canary_doc = {"x": time.time()} orig_canary_doc = copy.deepcopy(canary_doc) - mongo_opts = get_mongo_client_args(options) - mongo = pymongo.MongoClient(host=mongod_host, port=standard_port, **mongo_opts) + mongo = pymongo.MongoClient( + **get_mongo_client_args(options, host=mongod_host, port=standard_port)) crash_canary["function"] = mongo_insert_canary crash_canary["args"] = [ mongo, @@ -2193,7 +2361,7 @@ Examples: time.sleep(10) # Kill any running clients and cleanup temporary files. - kill_processes(crud_procs + fsm_procs) + Processes.kill_all() for temp_file in temp_client_files: NamedTempFile.delete(temp_file) @@ -2201,7 +2369,7 @@ Examples: if options.crash_method == "aws_ec2": ec2 = aws_ec2.AwsEc2() ret, aws_status = ec2.control_instance( - mode="start", image_id=instance_id, wait_time_secs=240, show_progress=True) + mode="start", image_id=instance_id, wait_time_secs=600, show_progress=True) LOGGER.info("Start instance: %d %s****", ret, aws_status) if ret: raise Exception("Start instance failed: {}".format(aws_status)) @@ -2226,6 +2394,8 @@ Examples: LOGGER.info("****Completed test loop %d test time %d seconds****", loop_num, test_time) if loop_num == options.num_loops or test_time >= options.test_time: break + + _report_json_success = True sys.exit(0) |