diff options
-rwxr-xr-x | buildscripts/launch_evergreen_ec2_instance.sh | 17 | ||||
-rw-r--r-- | buildscripts/resmokeconfig/suites/with_external_server.yml | 8 | ||||
-rw-r--r-- | etc/evergreen.yml | 151 | ||||
-rw-r--r-- | jstests/concurrency/fsm_libs/cluster.js | 5 | ||||
-rw-r--r-- | jstests/concurrency/fsm_libs/runner.js | 43 | ||||
-rw-r--r-- | jstests/hooks/crud_client.js | 145 | ||||
-rw-r--r-- | jstests/libs/fsm_serial_client.js | 17 | ||||
-rwxr-xr-x | pytests/powertest.py | 419 |
8 files changed, 672 insertions, 133 deletions
diff --git a/buildscripts/launch_evergreen_ec2_instance.sh b/buildscripts/launch_evergreen_ec2_instance.sh index 871130191ae..7562e5ab79a 100755 --- a/buildscripts/launch_evergreen_ec2_instance.sh +++ b/buildscripts/launch_evergreen_ec2_instance.sh @@ -14,20 +14,25 @@ function _usage_ { usage: $0 options This script supports the following parameters for Windows & Linux platforms: -k <ssh_key_id>, [REQUIRED] The ssh key id used to access the new AWS EC2 instance. - -t <tag_name>, [OPTIONAL] The tag name of the new AWS EC2 instance. -y <aws_ec2_yml>, [REQUIRED] YAML file name where to store the new AWS EC2 instance information. This file will be used in etc/evergreen.yml for macro expansion of variables used in other functions. + -s <security_group>, [OPTIONAL] The security group to be used for the new AWS EC2 instance. + To specify more than one group, invoke this option each time. + -t <tag_name>, [OPTIONAL] The tag name of the new AWS EC2 instance. EOF } # Parse command line options -while getopts "y:k:t:?" option +while getopts "k:s:t:y:?" option do case $option in k) ssh_key_id=$OPTARG ;; + s) + sec_groups="$sec_groups $OPTARG" + ;; t) tag_name=$OPTARG ;; @@ -51,6 +56,11 @@ if [ -z $ssh_key_id ]; then exit 1 fi +for sec_group in $sec_groups +do + security_groups="$security_groups --securityGroup $sec_group" +done + # Get the AMI information on the current host so we can launch a similar EC2 instance. # See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html#instancedata-data-retrieval aws_metadata_url="http://169.254.169.254/latest/meta-data" @@ -133,7 +143,8 @@ aws_ec2=$(python buildscripts/aws_ec2.py \ --instanceType $instance_type \ --keyName $ssh_key_id \ --mode create \ - --tagName "$tag_name" \ + $security_groups \ + --tagName "$tag_name" \ --tagOwner "$USER" \ $block_device_option | tr -cd "[:print:]\n") echo "Spawned new AMI EC2 instance: $aws_ec2" diff --git a/buildscripts/resmokeconfig/suites/with_external_server.yml b/buildscripts/resmokeconfig/suites/with_external_server.yml new file mode 100644 index 00000000000..a7ad1316cf7 --- /dev/null +++ b/buildscripts/resmokeconfig/suites/with_external_server.yml @@ -0,0 +1,8 @@ +test_kind: js_test + +# Use this suite to connect to a running mongod and specify resmoke.py options +# '--shellConnPort' or '--shellConnString'. +executor: + config: + shell_options: + readMode: commands
\ No newline at end of file diff --git a/etc/evergreen.yml b/etc/evergreen.yml index ee2f832059a..0bd6879d3b6 100644 --- a/etc/evergreen.yml +++ b/etc/evergreen.yml @@ -146,16 +146,19 @@ variables: # Templates used by powercycle - &powercycle_remote_credentials private_key_file: $(${posix_workdir})/powercycle.pem - private_key_remote: ${kitchen_private_key} - aws_key_remote: ${kitchen_aws_key} - aws_secret_remote: ${kitchen_aws_secret} + private_key_remote: ${powercycle_private_key} + aws_key_remote: ${powercycle_aws_key} + aws_secret_remote: ${powercycle_aws_secret} # TODO: Remove psutil from virtualenv_requirements when BUILD-3083 is done - &powercycle_ec2_instance aws_ec2_yml: aws_ec2.yml remote_dir: powercycle + secret_port: "20001" + security_groups: mci powercycle_testing ssh_identity: -i ${private_key_file} - ssh_key_id: ${kitchen_ssh_key_id} + ssh_key_id: ${powercycle_ssh_key_id} + standard_port: "20000" virtualenv_dir: venv_powercycle virtualenv_requirements: psutil @@ -175,10 +178,11 @@ variables: ec2_artifacts: ${log_path} ${backup_path_after} ${backup_path_before} program_options: --logLevel debug --backupPathBefore ${backup_path_before} --backupPathAfter ${backup_path_after} connection_options: --sshUserHost ${ip_address} --sshConnection \"${ssh_identity} ${ssh_connection_options}\" - test_options: --testLoops 25 --seedDocNum 10000 --rsync --validate remote --canary remote + test_options: --testLoops 25 --seedDocNum 10000 --rsync --validate local --canary remote crash_options: --crashMethod internal --crashWaitTime 30 --jitterForCrashWaitTime 5 + client_options: --numCrudClients 5 --numFsmClients 5 mongodb_options: --rootDir ${remote_dir}-${task_id} --mongodbBinDir ${remote_dir} - mongod_options: --dbPath ${db_path} --logPath ${log_path} + mongod_options: --mongodUsablePorts ${standard_port} ${secret_port} --dbPath ${db_path} --logPath ${log_path} mongod_extra_options: --mongodOptions \"--setParameter enableTestCommands=1\" ####################################### @@ -830,56 +834,67 @@ functions: fi "run powercycle test" : - command: shell.exec - params: - working_dir: src - script: | - set -o verbose - set -o errexit + - command: shell.exec + params: + working_dir: src + script: | + set -o verbose + set -o errexit - ${activate_virtualenv} - if [ ! -z "${virtualenv_requirements}" ]; then - easy_install ${virtualenv_requirements} - fi + ${activate_virtualenv} + if [ ! -z "${virtualenv_requirements}" ]; then + pip install ${virtualenv_requirements} + fi - ${set_sudo} - if [ ! -z $sudo ]; then - remote_sudo="--remoteSudo" - fi + - command: shell.exec + type: test + params: + working_dir: src + script: | + set -o verbose + set -o errexit + + ${activate_virtualenv} + + ${set_sudo} + if [ ! -z $sudo ]; then + remote_sudo="--remoteSudo" + fi - # The virtaulenv bin_dir is different for Linux and Windows - bin_dir=$(find $VIRTUAL_ENV -name activate | sed -e "s,$VIRTUAL_ENV,,;s,activate,,;s,/,,g") - cmds="source ${virtualenv_dir|venv}/$bin_dir/activate" - cmds="$cmds; python -u" - # The remote python operates in a virtualenv - remote_python="--remotePython \"$cmds\"" + # The virtaulenv bin_dir is different for Linux and Windows + bin_dir=$(find $VIRTUAL_ENV -name activate | sed -e "s,$VIRTUAL_ENV,,;s,activate,,;s,/,,g") + cmds="source ${virtualenv_dir|venv}/$bin_dir/activate" + cmds="$cmds; python -u" + # The remote python operates in a virtualenv + remote_python="--remotePython \"$cmds\"" - start_time=$(date +%s) - status="\"pass\"" - failures=0 + start_time=$(date +%s) + status="\"pass\"" + failures=0 - set +o errexit - eval python -u pytests/powertest.py \ - "${connection_options} \ - ${program_options} \ - $remote_sudo \ - $remote_python \ - ${test_options} \ - ${crash_options} \ - ${mongodb_options} \ - ${mongod_options} \ - ${mongod_extra_options}" - exit_code=$? + set +o errexit + eval python -u pytests/powertest.py \ + "${connection_options} \ + ${program_options} \ + $remote_sudo \ + $remote_python \ + ${test_options} \ + ${crash_options} \ + ${client_options} \ + ${mongodb_options} \ + ${mongod_options} \ + ${mongod_extra_options}" + exit_code=$? - # Create report.json - end_time=$(date +%s) - elapsed_secs=$((end_time-start_time)) - if [ $exit_code -ne 0 ]; then - status="\"fail\"" - failures=1 - fi - echo "{\"failures\": $failures, \"results\": [{\"status\": $status, \"exit_code\": $exit_code, \"test_file\": \"${task_name}\", \"start\": $start_time, \"end\": $end_time, \"elapsed\": $elapsed_secs}]}" > report.json - exit $exit_code + # Create report.json + end_time=$(date +%s) + elapsed_secs=$((end_time-start_time)) + if [ $exit_code -ne 0 ]; then + status="\"fail\"" + failures=1 + fi + echo "{\"failures\": $failures, \"results\": [{\"status\": $status, \"exit_code\": $exit_code, \"test_file\": \"${task_name}\", \"start\": $start_time, \"end\": $end_time, \"elapsed\": $elapsed_secs}]}" > report.json + exit $exit_code "do multiversion setup" : command: shell.exec @@ -1087,11 +1102,17 @@ functions: set -o errexit ${activate_virtualenv} - easy_install --upgrade boto3 + pip install boto3 + + for security_group in ${security_groups} + do + security_groups="$security_groups -s $security_group" + done # Clone another instance of this host in EC2. buildscripts/launch_evergreen_ec2_instance.sh \ -k ${ssh_key_id} \ + $security_groups \ -t "AMI Evergreen ${task_id}" \ -y ${aws_ec2_yml} @@ -1220,7 +1241,7 @@ functions: cmds="$cmds; virtualenv --python \$python_loc --system-site-packages ${virtualenv_dir|venv}" cmds="$cmds; activate=\$(find ${virtualenv_dir|venv} -name 'activate')" cmds="$cmds; source \$activate" - cmds="$cmds; easy_install ${virtualenv_requirements}" + cmds="$cmds; pip install ${virtualenv_requirements}" ssh_connection_options="${ssh_identity} ${ssh_connection_options}" ${python|/opt/mongodbtoolchain/v2/bin/python2} buildscripts/remote_operations.py \ --verbose \ @@ -1240,7 +1261,7 @@ functions: if [ -z "${ec2_artifacts}" ]; then exit 0 fi - cmd="${tar|tar} czvf ec2_artifacts.tgz ${ec2_artifacts}" + cmd="${tar|tar} czf ec2_artifacts.tgz ${ec2_artifacts}" ssh_connection_options="${ssh_identity} ${ssh_connection_options}" ${python|/opt/mongodbtoolchain/v2/bin/python2} buildscripts/remote_operations.py \ --verbose \ @@ -1429,6 +1450,27 @@ post: display_name: Remote EC2 Artifacts - Execution ${execution} optional: true + # Gather and archive the local client logs. + - command: shell.exec + params: + working_dir: src + script: | + client_logs=$(ls crud*.log fsm*.log 2> /dev/null) + if [ ! -z "$client_logs" ]; then + ${tar|tar} czf client-logs.tgz $client_logs + fi + - command: s3.put + params: + aws_key: ${aws_key} + aws_secret: ${aws_secret} + local_file: src/client-logs.tgz + remote_file: mongodb-mongo-master/${build_variant}/${revision}/client_logs/mongo-client-logs-${task_id}-${execution}.tgz + bucket: mciuploads + permissions: public-read + content_type: ${content_type|application/x-gzip} + display_name: Client logs - Execution ${execution} + optional: true + # Process and save coverage data. - command: shell.exec params: @@ -4026,8 +4068,11 @@ tasks: - command: expansions.update <<: *powercycle_expansions - func: "run powercycle test" + # Disable the CRUD & FSM clients for mmapv1. + # mongod will not start if it crashed mongod while creating a namespace (SERVER-26499). vars: <<: *powercycle_test + client_options: --numCrudClients 0 --numFsmClients 0 mongod_extra_options: --mongodOptions \"--setParameter enableTestCommands=1 --storageEngine mmapv1\" - name: powercycle_WT diff --git a/jstests/concurrency/fsm_libs/cluster.js b/jstests/concurrency/fsm_libs/cluster.js index 931e75a0af1..451de070f9d 100644 --- a/jstests/concurrency/fsm_libs/cluster.js +++ b/jstests/concurrency/fsm_libs/cluster.js @@ -617,7 +617,10 @@ var Cluster = function(options) { var res = adminDB.runCommand({getCmdLineOpts: 1}); assert.commandWorked(res, 'failed to get command line options'); - var wiredTigerOptions = res.parsed.storage.wiredTiger || {}; + var wiredTigerOptions = {}; + if (res.parsed && res.parsed.storage) { + wiredTigerOptions = res.parsed.storage.wiredTiger || {}; + } var wiredTigerCollectionConfig = wiredTigerOptions.collectionConfig || {}; var wiredTigerConfigString = wiredTigerCollectionConfig.configString || ''; diff --git a/jstests/concurrency/fsm_libs/runner.js b/jstests/concurrency/fsm_libs/runner.js index 4997a7bf775..cb5cb79f34e 100644 --- a/jstests/concurrency/fsm_libs/runner.js +++ b/jstests/concurrency/fsm_libs/runner.js @@ -116,7 +116,7 @@ var runner = (function() { } function validateCleanupOptions(options) { - var allowedKeys = ['dropDatabaseBlacklist', 'keepExistingDatabases']; + var allowedKeys = ['dropDatabaseBlacklist', 'keepExistingDatabases', 'validateCollections']; Object.keys(options).forEach(function(option) { assert.contains(option, @@ -136,6 +136,12 @@ var runner = (function() { 'expected keepExistingDatabases to be a boolean'); } + options.validateCollections = + options.hasOwnProperty('validateCollections') ? options.validateCollections : true; + assert.eq('boolean', + typeof options.validateCollections, + 'expected validateCollections to be a boolean'); + return options; } @@ -441,7 +447,8 @@ var runner = (function() { jsTest.log('End of schedule'); } - function cleanupWorkload(workload, context, cluster, errors, header, dbHashBlacklist) { + function cleanupWorkload( + workload, context, cluster, errors, header, dbHashBlacklist, cleanupOptions) { // Returns true if the workload's teardown succeeds and false if the workload's // teardown fails. @@ -458,7 +465,9 @@ var runner = (function() { } try { - cluster.validateAllCollections(phase); + if (cleanupOptions.validateCollections) { + cluster.validateAllCollections(phase); + } } catch (e) { errors.push(new WorkloadFailure( e.toString(), e.stack, 'main', header + ' validating collections')); @@ -509,7 +518,8 @@ var runner = (function() { errors, maxAllowedThreads, dbHashBlacklist, - configServerData) { + configServerData, + cleanupOptions) { var cleanup = []; var teardownFailed = false; var startTime = Date.now(); // Initialize in case setupWorkload fails below. @@ -588,9 +598,13 @@ var runner = (function() { } finally { // Call each foreground workload's teardown function. After all teardowns have completed // check if any of them failed. - var cleanupResults = - cleanup.map(workload => cleanupWorkload( - workload, context, cluster, errors, 'Foreground', dbHashBlacklist)); + var cleanupResults = cleanup.map(workload => cleanupWorkload(workload, + context, + cluster, + errors, + 'Foreground', + dbHashBlacklist, + cleanupOptions)); teardownFailed = cleanupResults.some(success => (success === false)); totalTime = Date.now() - startTime; @@ -622,8 +636,8 @@ var runner = (function() { validateExecutionOptions(executionMode, executionOptions); Object.freeze(executionOptions); // immutable after validation (and normalization) - Object.freeze(cleanupOptions); // immutable prior to validation validateCleanupOptions(cleanupOptions); + Object.freeze(cleanupOptions); // immutable after validation (and normalization) if (executionMode.composed) { clusterOptions.sameDB = true; @@ -743,7 +757,8 @@ var runner = (function() { errors, maxAllowedThreads, dbHashBlacklist, - configServerData); + configServerData, + cleanupOptions); }); } finally { // Set a flag so background threads know to terminate. @@ -755,9 +770,13 @@ var runner = (function() { } finally { try { // Call each background workload's teardown function. - bgCleanup.forEach( - bgWorkload => cleanupWorkload( - bgWorkload, bgContext, cluster, errors, 'Background', dbHashBlacklist)); + bgCleanup.forEach(bgWorkload => cleanupWorkload(bgWorkload, + bgContext, + cluster, + errors, + 'Background', + dbHashBlacklist, + cleanupOptions)); // TODO: Call cleanupWorkloadData() on background workloads here if no background // workload teardown functions fail. diff --git a/jstests/hooks/crud_client.js b/jstests/hooks/crud_client.js new file mode 100644 index 00000000000..a20cce5da69 --- /dev/null +++ b/jstests/hooks/crud_client.js @@ -0,0 +1,145 @@ +// Basic CRUD client to provide load for Powercycle testing. + +'use strict'; + +function randString(maxLength) { + maxLength = maxLength || 1024; + const randChars = "ABCDEFGHIKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; + var rString = ""; + for (var i = 0; i < Random.randInt(maxLength); i++) { + rString += randChars.charAt(Random.randInt(randChars.length)); + } + return rString; +} + +function weightedChoice(choices) { + var total = 0; + for (var choice in choices) { + total += choices[choice]; + } + var ran = Random.randInt(total); + var upto = 0; + for (choice in choices) { + var weight = choices[choice]; + if (upto + weight >= ran) { + return choice; + } + upto += weight; + } +} + +var operations = { + "bulk insert": 15, + "count": 20, + "find": 15, + "remove multi": 15, + "remove one": 15, + "upsert one": 15, + "upsert multi": 5 +}; + +Random.setRandomSeed(); + +if (typeof TestData === "undefined") { + TestData = {}; +} +var dbName = TestData.dbName || "test"; +var collectionName = TestData.collectionName || "crud"; +var bulkNum = TestData.bulkNum || 1000; +var baseNum = TestData.baseNum || 100000; +// Set numLoops <= 0 to have an infinite loop. +var numLoops = TestData.numLoops || 0; + +print("****Starting CRUD client, namespace", dbName, collectionName, "numLoops", numLoops, "****"); + +var coll = db.getSiblingDB(dbName)[collectionName]; +var writeConcern = TestData.writeConcern || {}; +if (Object.keys(writeConcern).length) { + coll.setWriteConcern = writeConcern; +} + +var readConcern = TestData.readConcern || {}; +var noReadConcern = true; +if (Object.keys(readConcern).length) { + noReadConcern = false; +} + +coll.createIndex({x: 1}); + +var shouldLoopForever = numLoops <= 0; +while (shouldLoopForever || numLoops > 0) { + if (!shouldLoopForever) { + numLoops -= 1; + } + + var info = db.hostInfo(); + var serverStatus = db.serverStatus(); + print("(" + collectionName + ") dbHostInfo status:", + info.ok, + serverStatus.version, + "uptime:", + serverStatus.uptime); + var match = Random.randInt(baseNum); + var matchQuery = {$gte: match, $lt: match + (baseNum * 0.01)}; + + var operation = weightedChoice(operations); + + if (operation == "upsert multi") { + var updateOpts = {upsert: true, multi: true}; + print("(" + collectionName + ") Upsert multi docs", + tojsononeline(matchQuery), + tojsononeline(updateOpts), + tojsononeline(coll.update( + {x: matchQuery}, {$inc: {x: baseNum}, $set: {n: "hello"}}, updateOpts))); + } else if (operation == "upsert one") { + var updateOpts = {upsert: true, multi: false}; + print("(" + collectionName + ") Upsert single doc", + match, + tojsononeline(updateOpts), + tojsononeline( + coll.update({x: match}, {$inc: {x: baseNum}, $set: {n: "hello"}}, updateOpts))); + } else if (operation == "bulk insert") { + var bulk = coll.initializeUnorderedBulkOp(); + for (var i = 0; i < bulkNum; i++) { + bulk.insert({x: (match + i) % baseNum, doc: randString()}); + } + print("(" + collectionName + ") Bulk insert", + bulkNum, + "docs", + tojsononeline(writeConcern), + tojsononeline(bulk.execute(writeConcern))); + } else if (operation == "count") { + var countOpts = {count: collectionName, query: {x: matchQuery}}; + if (!noReadConcern) { + countOpts.readConcern = readConcern; + } + print("(" + collectionName + ") Count docs", + tojsononeline(matchQuery), + tojsononeline(readConcern), + tojsononeline(db.runCommand(countOpts))); + } else if (operation == "find") { + var findOpts = {find: collectionName, singleBatch: true, filter: {x: matchQuery}}; + if (!noReadConcern) { + findOpts.readConcern = readConcern; + } + print("(" + collectionName + ") Find docs", + tojsononeline(matchQuery), + tojsononeline(readConcern), + "find status", + db.runCommand(findOpts).ok); + } else if (operation == "remove multi") { + var removeOpts = {}; + var removeQuery = {x: matchQuery, justOne: false}; + print("(" + collectionName + ") Remove docs", + tojsononeline(removeQuery), + tojsononeline(removeOpts), + tojsononeline(coll.remove(removeQuery, removeOpts))); + } else if (operation == "remove one") { + var removeOpts = {}; + var removeQuery = {x: match, justOne: true}; + print("(" + collectionName + ") Remove docs", + tojsononeline(removeQuery), + tojsononeline(removeOpts), + tojsononeline(coll.remove(removeQuery, removeOpts))); + } +} diff --git a/jstests/libs/fsm_serial_client.js b/jstests/libs/fsm_serial_client.js new file mode 100644 index 00000000000..6aa246c0abc --- /dev/null +++ b/jstests/libs/fsm_serial_client.js @@ -0,0 +1,17 @@ +// This is the template file used in Powercycle testing for launching FSM Serial clients. +'use strict'; + +load('jstests/concurrency/fsm_libs/runner.js'); + +var workloadDir = 'jstests/concurrency/fsm_workloads'; + +var workloadList = TestData.workloadFiles || ls(workloadDir); +var dbNamePrefix = TestData.dbNamePrefix || ''; +var fsmDbBlacklist = TestData.fsmDbBlacklist || []; +var validateCollectionsOnCleanup = TestData.validateCollections; + +runWorkloadsSerially(workloadList, {}, {dbNamePrefix: dbNamePrefix}, { + keepExistingDatabases: true, + dropDatabaseBlacklist: fsmDbBlacklist, + validateCollections: validateCollectionsOnCleanup +}); diff --git a/pytests/powertest.py b/pytests/powertest.py index 49ce400d589..be18ebeabe2 100755 --- a/pytests/powertest.py +++ b/pytests/powertest.py @@ -2,11 +2,12 @@ """Powercycle test -Tests robustness of mongod to survice multiple powercycle events. +Tests robustness of mongod to survive multiple powercycle events. """ from __future__ import print_function +import atexit import collections import copy import datetime @@ -101,6 +102,36 @@ LOGGER = logging.getLogger(__name__) This script will either download a MongoDB tarball or use an existing setup. """ +def exit_handler(): + """Exit handler, deletes all named temporary files.""" + LOGGER.debug("Exit handler invoked, cleaning up temporary files") + try: + NamedTempFile.delete_all() + except: + pass + + +def kill_processes(pids, kill_children=True): + """Kill a list of processes and optionally it's children.""" + for pid in pids: + LOGGER.debug("Killing process with pid %d", pid) + try: + proc = psutil.Process(pid) + except psutil.NoSuchProcess: + LOGGER.error("Could not kill process with pid %d, as it no longer exists", pid) + continue + if kill_children: + child_procs = proc.children(recursive=True) + child_pids = [] + for child in child_procs: + child_pids.append(child.pid) + kill_processes(child_pids, kill_children=False) + try: + proc.kill() + except psutil.NoSuchProcess: + LOGGER.error("Could not kill process with pid %d, as it no longer exists", pid) + + def get_extension(filename): """Returns the extension of a file.""" return os.path.splitext(filename)[-1] @@ -140,40 +171,73 @@ def executable_exists_in_path(executable): return distutils.spawn.find_executable(executable) is not None +def create_temp_executable_file(cmds): + """Creates an executable temporary file containing 'cmds'. Returns file name.""" + temp_file_name = NamedTempFile.create(suffix=".sh") + with NamedTempFile.get(temp_file_name) as temp_file: + temp_file.write(cmds) + os_st = os.stat(temp_file_name) + os.chmod(temp_file_name, os_st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) + return temp_file_name + + +def start_cmd(cmd, use_file=False): + """Starts command and returns pid from Popen""" + + orig_cmd = "" + # Multi-commands need to be written to a temporary file to execute on Windows. + # This is due to complications with invoking Bash in Windows. + if use_file: + orig_cmd = cmd + temp_file = create_temp_executable_file(cmd) + # The temporary file name will have '\' on Windows and needs to be converted to '/'. + cmd = "bash -c {}".format(temp_file.replace("\\", "/")) + + # If 'cmd' is specified as a string, convert it to a list of strings. + if isinstance(cmd, str): + cmd = shlex.split(cmd) + + if use_file: + LOGGER.debug("Executing '%s', tempfile contains: %s", cmd, orig_cmd) + else: + LOGGER.debug("Executing '%s'", cmd) + + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + + return proc.pid + + def execute_cmd(cmd, use_file=False): - """Executes command and returns return_code and output from command""" + """Executes command and returns return_code, output from command""" orig_cmd = "" # Multi-commands need to be written to a temporary file to execute on Windows. # This is due to complications with invoking Bash in Windows. if use_file: orig_cmd = cmd - with tempfile.NamedTemporaryFile(suffix=".sh", delete=False) as temp_file: - temp_file.write(cmd) - os_st = os.stat(temp_file.name) - os.chmod(temp_file.name, os_st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) + temp_file = create_temp_executable_file(cmd) # The temporary file name will have '\' on Windows and needs to be converted to '/'. - cmd = "bash -c {}".format(temp_file.name.replace("\\", "/")) + cmd = "bash -c {}".format(temp_file.replace("\\", "/")) # If 'cmd' is specified as a string, convert it to a list of strings. if isinstance(cmd, str): cmd = shlex.split(cmd) if use_file: - LOGGER.info("Executing '%s', tempfile contains: %s", cmd, orig_cmd) + LOGGER.debug("Executing '%s', tempfile contains: %s", cmd, orig_cmd) else: - LOGGER.info("Executing '%s'", cmd) + LOGGER.debug("Executing '%s'", cmd) try: - proc = subprocess.Popen( - cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) output, _ = proc.communicate() error_code = proc.returncode if error_code: output = "Error executing cmd {}: {}".format(cmd, output) finally: if use_file: - os.remove(temp_file.name) + os.remove(temp_file) + return error_code, output @@ -202,23 +266,44 @@ def parse_options(options): return options_map -def download_file(url, file_name): +def download_file(url, file_name, download_retries=5): """Returns True if download was successful. Raises error if download fails.""" LOGGER.info("Downloading %s to %s", url, file_name) - with requests.Session() as session: - adapter = requests.adapters.HTTPAdapter(max_retries=5) - session.mount(url, adapter) - response = session.get(url, stream=True) - response.raise_for_status() - - with open(file_name, "wb") as file_handle: - for block in response.iter_content(1024): - file_handle.write(block) + while download_retries > 0: - adapter.close() + with requests.Session() as session: + adapter = requests.adapters.HTTPAdapter(max_retries=download_retries) + session.mount(url, adapter) + response = session.get(url, stream=True) + response.raise_for_status() - return True + with open(file_name, "wb") as file_handle: + try: + for block in response.iter_content(1024 * 1000): + file_handle.write(block) + except requests.exceptions.ChunkedEncodingError as err: + download_retries -= 1 + if download_retries == 0: + raise Exception("Incomplete download for URL {}: {}".format(url, err)) + continue + + # Check if file download was completed. + if "Content-length" in response.headers: + url_content_length = int(response.headers["Content-length"]) + file_size = os.path.getsize(file_name) + # Retry download if file_size has an unexpected size. + if url_content_length != file_size: + download_retries -= 1 + if download_retries == 0: + raise Exception("Downloaded file size ({} bytes) doesn't match content length" + "({} bytes) for URL {}".format( + file_size, url_content_length, url)) + continue + + return True + + raise Exception("Unknown download problem for {} to file {}".format(url, file_name)) def install_tarball(tarball, root_dir): @@ -361,7 +446,7 @@ def install_mongod(bin_dir=None, tarball_url="latest", root_dir=None): # Symlink the bin dir from the tarball to 'root_bin_dir'. # Since get_bin_dir returns an abolute path, we need to remove 'root_dir' tarball_bin_dir = get_bin_dir(root_dir).replace("{}/".format(root_dir), "") - LOGGER.info("Symlink %s to %s", tarball_bin_dir, root_bin_dir) + LOGGER.debug("Symlink %s to %s", tarball_bin_dir, root_bin_dir) symlink_dir(tarball_bin_dir, root_bin_dir) @@ -380,6 +465,40 @@ def call_remote_operation(local_ops, remote_python, script_name, client_args, op return ret, output +class NamedTempFile(object): + """Class to control temporary files.""" + + _FILE_MAP = {} + + @classmethod + def create(cls, suffix=""): + """Creates a temporary file and returns the file name.""" + temp_file = tempfile.NamedTemporaryFile(suffix=suffix, delete=False) + cls._FILE_MAP[temp_file.name] = temp_file + return temp_file.name + + @classmethod + def get(cls, name): + """Gets temporary file object. Raises an exception if the file is unknown.""" + if name not in cls._FILE_MAP: + raise Exception("Unknown temporary file {}.".format(name)) + return cls._FILE_MAP[name] + + @classmethod + def delete(cls, name): + """Deletes temporary file. Raises an exception if the file is unknown.""" + if name not in cls._FILE_MAP: + raise Exception("Unknown temporary file {}.".format(name)) + os.remove(name) + del cls._FILE_MAP[name] + + @classmethod + def delete_all(cls): + """Deletes all temporary files.""" + for name in list(cls._FILE_MAP): + cls.delete(name) + + class ProcessControl(object): """ Process control class. @@ -430,14 +549,15 @@ class ProcessControl(object): return True return False - def terminate(self): - """ Terminates all running processes that match the list of pids. """ + def kill(self): + """ Kills all running processes that match the list of pids. """ if self.is_running(): for proc in self.get_procs(): try: - proc.terminate() + proc.kill() except psutil.NoSuchProcess: - LOGGER.info("Could not terminate pid %d, process no longer exists", proc.pid) + LOGGER.info("Could not kill process with pid %d, as it no longer exists", + proc.pid) class WindowsService(object): @@ -611,7 +731,7 @@ class PosixService(object): def stop(self): """ Stop process. Returns (code, output) tuple. """ proc = ProcessControl(name=self.bin_name) - proc.terminate() + proc.kill() self.pids = [] return 0, None @@ -907,11 +1027,7 @@ def internal_crash(use_sudo=False): # Windows does not have a way to immediately crash itself. It's # better to use an external mechanism instead. if _IS_WINDOWS: - # Sleep after issuing shutdown, to prevent the 'client' side script - # continuing, as shutdown is no immediate. - cmds = """ - shutdown /r /f /t 0 ; - sleep 10""" + cmds = "shutdown /r /f /t 0" ret, output = execute_cmd(cmds, use_file=True) return ret, output else: @@ -937,7 +1053,7 @@ def internal_crash(use_sudo=False): return 1, "Crash did not occur" -def crash_server(options, crash_canary, local_ops, script_name, client_args): +def crash_server(options, crash_canary, canary_port, local_ops, script_name, client_args): """ Crashes server and optionally writes canary doc before crash. """ crash_wait_time = options.crash_wait_time + random.randint(0, options.crash_wait_time_jitter) @@ -962,7 +1078,8 @@ def crash_server(options, crash_canary, local_ops, script_name, client_args): if options.canary == "remote": # The crash canary function executes remotely, only if the # crash_method is 'internal'. - canary = "--docForCanary \"{}\"".format(crash_canary["args"][3]) + canary = "--mongodPort {} --docForCanary \"{}\"".format( + canary_port, crash_canary["args"][3]) canary_cmd = "insert_canary" else: canary = "" @@ -1011,7 +1128,7 @@ def get_mongo_client_args(options): """ Returns keyword arg dict used in PyMongo client. """ mongo_args = {} # Set the writeConcern - mongo_args = options.write_concern + mongo_args = yaml.safe_load(options.write_concern) # Set the readConcernLevel if options.read_concern_level: mongo_args["readConcernLevel"] = options.read_concern_level @@ -1146,9 +1263,58 @@ def mongo_insert_canary(mongo, db_name, coll_name, doc): return 0 if res.inserted_id else 1 +def new_resmoke_config(config_file, new_config_file, test_data): + """ Creates 'new_config_file', from 'config_file', with an update from 'test_data'. """ + new_config = { + "executor": { + "config": { + "shell_options": { + "global_vars": { + "TestData": test_data + } + } + } + } + } + with open(config_file, "r") as yaml_stream: + config = yaml.load(yaml_stream) + config.update(new_config) + with open(new_config_file, "w") as yaml_stream: + yaml.safe_dump(config, yaml_stream) + + +def resmoke_client(work_dir, + mongo_path, + host_port, + js_test, + resmoke_suite, + no_wait=False, + log_file=None): + """Starts resmoke client from work_dir, connecting to host_port and executes js_test.""" + log_output = "2>& 1 | tee -a {}".format(log_file) if log_file else "" + cmds = ("cd {} ; " + "python buildscripts/resmoke.py " + "--mongo {} " + "--suites {} " + "--shellConnString mongodb://{} " + "--continueOnFailure " + "{} " + "{}".format( + work_dir, mongo_path, resmoke_suite, host_port, js_test, log_output)) + ret, output, pid = None, None, None + if no_wait: + pid = start_cmd(cmds, use_file=True) + else: + ret, output = execute_cmd(cmds, use_file=True) + return ret, output, pid + + def main(): """ Main program. """ + + atexit.register(exit_handler) + parser = optparse.OptionParser(usage=""" %prog [options] @@ -1179,6 +1345,7 @@ Examples: crash_options = optparse.OptionGroup(parser, "Crash Options") mongodb_options = optparse.OptionGroup(parser, "MongoDB Options") mongod_options = optparse.OptionGroup(parser, "mongod Options") + client_options = optparse.OptionGroup(parser, "Client Options") program_options = optparse.OptionGroup(parser, "Program Options") # Test options @@ -1199,18 +1366,6 @@ Examples: " which are added to '{}'".format(default_ssh_connection_options), default=None) - test_options.add_option("--mongoPath", - dest="mongo_path", - help="Path to mongo (shell) executable, if unspecifed, mongo client" - " is launched from $PATH", - default="mongo") - - test_options.add_option("--mongoRepoRootDir", - dest="mongo_repo_root_dir", - help="Root directory of mongoDB repository, defaults to current" - " directory.", - default=None) - test_options.add_option("--testLoops", dest="num_loops", help="Number of powercycle loops to run [default: %default]", @@ -1400,6 +1555,60 @@ Examples: " 'source venv/bin/activate; python'", default="python") + # Client options + mongo_path = distutils.spawn.find_executable( + "mongo", os.getcwd() + os.pathsep + os.environ["PATH"]) + if mongo_path: + mongo_path = os.path.abspath(mongo_path) + client_options.add_option("--mongoPath", + dest="mongo_path", + help="Path to mongo (shell) executable, if unspecifed, mongo client" + " is launched from the current directory.", + default=mongo_path) + + client_options.add_option("--mongoRepoRootDir", + dest="mongo_repo_root_dir", + help="Root directory of mongoDB repository, defaults to current" + " directory.", + default=None) + + client_options.add_option("--crudClient", + dest="crud_client", + help="The path to the CRUD client script on the local host" + " [default: '%default'].", + default="jstests/hooks/crud_client.js") + + client_options.add_option("--configCrudClient", + dest="config_crud_client", + help="The path to the CRUD client configuration YML file on the" + " local host. This is the resmoke.py suite file. If" + " unspecified, a default configuration will be used that" + " provides a mongo (shell) DB connection to a running mongod.", + default=None) + + client_options.add_option("--numCrudClients", + dest="num_crud_clients", + help="The number of concurrent CRUD clients to run" + " [default: '%default'].", + type="int", + default=1) + + client_options.add_option("--numFsmClients", + dest="num_fsm_clients", + help="The number of concurrent FSM clients to run" + " [default: '%default'].", + type="int", + default=0) + + client_options.add_option("--fsmWorkloadFiles", + dest="fsm_workload_files", + help="A list of the FSM workload files to execute. More than one" + " file can be specified either in a comma-delimited string," + " or by specifying this option more than once. If unspecified," + " then all FSM workload files are executed.", + action="append", + default=[]) + # Program options program_options.add_option("--remoteSudo", dest="remote_sudo", @@ -1458,6 +1667,7 @@ Examples: parser.add_option_group(test_options) parser.add_option_group(crash_options) + parser.add_option_group(client_options) parser.add_option_group(mongodb_options) parser.add_option_group(mongod_options) parser.add_option_group(program_options) @@ -1486,7 +1696,7 @@ Examples: mongod_options_map = parse_options(options.mongod_options) # Error out earlier if these options are not properly specified - options.write_concern = yaml.safe_load(options.write_concern) + write_concern = yaml.safe_load(options.write_concern) options.canary_doc = yaml.safe_load(options.canary_doc) # Invoke remote_handler if remote_operation is specified. @@ -1511,19 +1721,41 @@ Examples: rsync_cmd = "" rsync_opt = "" - # Setup the mongo_repo_root - mongo_repo_root_dir = "." if not options.mongo_repo_root_dir else options.mongo_repo_root_dir + # Setup the mongo client, mongo_path is required if there are local clients. + if (options.num_crud_clients > 0 or + options.num_fsm_clients > 0 or + options.validate_collections == "local"): + if options.mongo_path is None: + LOGGER.error("mongoPath must be specified") + sys.exit(1) + elif not os.path.isfile(options.mongo_path): + LOGGER.error("mongoPath %s does not exist", options.mongo_path) + sys.exit(1) + + # Setup the CRUD & FSM clients. + with_external_server = "buildscripts/resmokeconfig/suites/with_external_server.yml" + config_crud_client = options.config_crud_client + fsm_client = "jstests/libs/fsm_serial_client.js" + fsm_workload_files = [] + for fsm_workload_file in options.fsm_workload_files: + fsm_workload_files += fsm_workload_file.replace(" ", "").split(",") + + # Setup the mongo_repo_root. + if options.mongo_repo_root_dir: + mongo_repo_root_dir = options.mongo_repo_root_dir + else: + mongo_repo_root_dir = os.getcwd() if not os.path.isdir(mongo_repo_root_dir): LOGGER.error("mongoRepoRoot %s does not exist", mongo_repo_root_dir) sys.exit(1) - # Setup the validate_collections option + # Setup the validate_collections option. if options.validate_collections == "remote": validate_collections_cmd = "validate_collections" else: validate_collections_cmd = "" - # Setup the validate_canary option + # Setup the validate_canary option. if options.canary and "nojournal" in mongod_options_map: LOGGER.error("Cannot create and validate canary documents if the mongod option" " '--nojournal' is used.") @@ -1666,12 +1898,17 @@ Examples: # Optionally, run local validation of collections. if options.validate_collections == "local": - cmds = """ - TestData = {}; - TestData.skipValidationOnNamespaceNotFound = true; - load("jstests/hooks/run_validate_collections.js");""" host_port = "{}:{}".format(mongod_host, secret_port) - ret, output = mongo_shell(options.mongo_path, mongo_repo_root_dir, host_port, cmds) + new_config_file = NamedTempFile.create(suffix=".yml") + test_data = {"skipValidationOnNamespaceNotFound": True} + new_resmoke_config(with_external_server, new_config_file, test_data) + ret, output, _ = resmoke_client( + mongo_repo_root_dir, + options.mongo_path, + host_port, + "jstests/hooks/run_validate_collections.js", + new_config_file) + NamedTempFile.delete(new_config_file) LOGGER.info("Collection validation: %d %s", ret, output) if ret: sys.exit(ret) @@ -1721,7 +1958,57 @@ Examples: if ret: sys.exit(ret) - # TODO SERVER-30802: Add CRUD & FSM clients + # Start CRUD clients + crud_pids = [] + if options.num_crud_clients > 0: + host_port = "{}:{}".format(mongod_host, standard_port) + test_data = {"dbName": options.db_name} + if options.read_concern_level: + test_data["readConcern"] = {"level": options.read_concern_level} + if write_concern: + test_data["writeConcern"] = write_concern + + for i in xrange(options.num_crud_clients): + crud_config_file = NamedTempFile.create(suffix=".yml") + test_data["collectionName"] = "{}-{}".format(options.collection_name, i) + new_resmoke_config(with_external_server, crud_config_file, test_data) + _, _, pid = resmoke_client( + work_dir=mongo_repo_root_dir, + mongo_path=options.mongo_path, + host_port=host_port, + js_test=options.crud_client, + resmoke_suite=crud_config_file, + no_wait=True, + log_file="crud_{}.log".format(i)) + crud_pids.append(pid) + + LOGGER.info( + "****Started %d CRUD client(s) %s****", options.num_crud_clients, crud_pids) + + # Start FSM clients + fsm_pids = [] + if options.num_fsm_clients > 0: + test_data = {"fsmDbBlacklist": [options.db_name]} + if fsm_workload_files: + test_data["workloadFiles"] = fsm_workload_files + + for i in xrange(options.num_fsm_clients): + fsm_config_file = NamedTempFile.create(suffix=".yml") + test_data["dbNamePrefix"] = "fsm-{}".format(i) + # Do collection validation only for the first FSM client. + test_data["validateCollections"] = True if i == 0 else False + new_resmoke_config(with_external_server, fsm_config_file, test_data) + _, _, pid = resmoke_client( + work_dir=mongo_repo_root_dir, + mongo_path=options.mongo_path, + host_port=host_port, + js_test=fsm_client, + resmoke_suite=fsm_config_file, + no_wait=True, + log_file="fsm_{}.log".format(i)) + fsm_pids.append(pid) + + LOGGER.info("****Started %d FSM client(s) %s****", options.num_fsm_clients, fsm_pids) # Crash the server. A pre-crash canary document is optionally written to the DB. crash_canary = {} @@ -1729,17 +2016,21 @@ Examples: canary_doc = {"x": time.time()} orig_canary_doc = copy.deepcopy(canary_doc) mongo_opts = get_mongo_client_args(options) - mongo = pymongo.MongoClient( - host=mongod_host, port=standard_port, **mongo_opts) + mongo = pymongo.MongoClient(host=mongod_host, port=standard_port, **mongo_opts) crash_canary["function"] = mongo_insert_canary crash_canary["args"] = [ mongo, options.db_name, options.collection_name, canary_doc] - crash_server(options, crash_canary, local_ops, script_name, client_args) + crash_server(options, crash_canary, standard_port, local_ops, script_name, client_args) + # Wait a bit after sending command to crash the server to avoid connecting to the + # server before the actual crash occurs. + time.sleep(10) canary_doc = copy.deepcopy(orig_canary_doc) + kill_processes(crud_pids + fsm_pids) + test_time = int(time.time()) - start_time LOGGER.info("****Completed test loop %d test time %d seconds****", loop_num, test_time) if loop_num == options.num_loops or test_time >= options.test_time: |