diff options
author | Jonathan Abrahams <jonathan@mongodb.com> | 2018-07-12 16:27:51 -0400 |
---|---|---|
committer | Jonathan Abrahams <jonathan@mongodb.com> | 2018-07-12 16:27:51 -0400 |
commit | 999672772454d09172121559140f683675479e1f (patch) | |
tree | abccdcd7ca9bbe16a39ac60b2087ed198aa2d51e /etc/evergreen.yml | |
parent | 4b664ef89ee6e5d7a2c8f86da2e80dc60d5f38b8 (diff) | |
download | mongo-999672772454d09172121559140f683675479e1f.tar.gz |
SERVER-35724 Remote EC2 hosts which are not accessible via ssh should fail with system error
Diffstat (limited to 'etc/evergreen.yml')
-rw-r--r-- | etc/evergreen.yml | 73 |
1 files changed, 58 insertions, 15 deletions
diff --git a/etc/evergreen.yml b/etc/evergreen.yml index eb66fae2f57..2dc7d747c9b 100644 --- a/etc/evergreen.yml +++ b/etc/evergreen.yml @@ -157,7 +157,7 @@ variables: # TODO: Remove psutil from virtualenv_requirements when BUILD-3083 is done - &powercycle_ec2_instance aws_ec2_yml: aws_ec2.yml - expire_hours: "3" + ec2_expire_hours: "24" ec2_monitor_files: proc.json system.json monitor_proc_file: proc.json monitor_system_file: system.json @@ -187,10 +187,12 @@ variables: value: /data/db - key: log_path value: ${remote_dir}/mongod.log + - key: exit_file + value: powercycle_exit.yml - &powercycle_test ec2_artifacts: ${log_path} ${db_path} ${backup_path_after} ${backup_path_before} - program_options: --logLevel=info --backupPathBefore=${backup_path_before} --backupPathAfter=${backup_path_after} + program_options: --exitYamlFile=${exit_file} --logLevel=info --backupPathBefore=${backup_path_before} --backupPathAfter=${backup_path_after} connection_options: --sshUserHost=${private_ip_address} --sshConnection=\"${ssh_identity} ${ssh_connection_options}\" test_options: --testLoops=15 --seedDocNum=10000 --rsync --rsyncExcludeFiles=diagnostic.data/metrics.interim* --validate=local --canary=local crash_options: --crashMethod=internal --crashOption=${windows_crash_cmd} --crashWaitTime=45 --jitterForCrashWaitTime=5 --instanceId=${instance_id} @@ -1021,6 +1023,8 @@ functions: done fi + # Set an exit trap so we can save the real exit status (see SERVER-34033). + trap 'echo $? > error_exit.txt; exit 0' EXIT config_file=powertest.yml eval $python pytests/powertest.py \ "--saveConfigOptions=$config_file \ @@ -1039,10 +1043,22 @@ functions: ${mongod_extra_options}" set +o errexit $python -u pytests/powertest.py --configFile=$config_file - # SERVER-34033: Figure out why shell.exec exits at this point. - test_status=$? - echo "Completed pytests/powertest.py, status: $test_status" - exit $test_status + + - command: expansions.update + params: + ignore_missing_file: true + file: src/${exit_file} + + - command: shell.exec + params: + working_dir: src + shell: bash + script: | + # Trigger a system failure if powertest.py failed due to ssh access. + if [ -n "${ec2_ssh_failure}" ]; then + echo "ec2_ssh_failure detected - $(cat ${exit_file})" + exit ${exit_code} + fi - command: shell.exec params: @@ -1064,6 +1080,25 @@ functions: ignore_missing_file: true file: src/ec2_artifacts.yml + - command: shell.exec + type: test + params: + shell: bash + script: | + # Test exits from here with specified exit_code. + if [ -n "${exit_code}" ]; then + # Python program saved exit_code + exit_code=${exit_code} + elif [ -f error_exit.txt ]; then + # Bash trap exit_code + exit_code=$(cat error_exit.txt) + else + exit_code=0 + fi + echo "Exiting powercycle with code $exit_code" + exit $exit_code + + "do multiversion setup" : command: shell.exec params: @@ -1279,7 +1314,7 @@ functions: sleep 30 done - "set up EC2 instance": + "set up EC2 instance": &set_up_ec2_instance - command: shell.exec params: working_dir: src @@ -1305,8 +1340,12 @@ functions: done fi - if [ ! -z "${expire_hours}" ]; then - expire_hours="-e ${expire_hours}" + if [ -n "${ec2_expire_hours}" ]; then + expire_hours="-e ${ec2_expire_hours}" + # Since Windows hosts are expensive to keep running we'll expire it after 3 hours. + if [ "Windows_NT" = "$OS" ]; then + expire_hours="-e 3" + fi fi # Clone another instance of this host in EC2. @@ -1629,7 +1668,7 @@ functions: script: | ${activate_virtualenv} # Tar/zip artifacts on remote host. - if [ -z "${ec2_artifacts}" ]; then + if [[ -z "${ec2_artifacts}" || -n "${ec2_ssh_failure}" ]]; then exit 0 fi cmd="${tar|tar} czf ec2_artifacts.tgz ${ec2_artifacts}" @@ -1649,7 +1688,7 @@ functions: script: | ${activate_virtualenv} # Copy remote artifacts. - if [ -z "${ec2_artifacts}" ]; then + if [[ -z "${ec2_artifacts}" || -n "${ec2_ssh_failure}" ]]; then exit 0 fi ssh_connection_options="${ssh_identity} ${ssh_connection_options}" @@ -1664,9 +1703,11 @@ functions: "cleanup EC2 instance": &cleanup_ec2_instance command: shell.exec params: + shell: bash working_dir: src script: | - if [ -z ${instance_id} ]; then + # We do not terminate the EC2 instance if there was an ec2_ssh_failure. + if [[ -z "${ec2_artifacts}" || -n "${ec2_ssh_failure}" ]]; then exit 0 fi ${activate_virtualenv} @@ -1677,9 +1718,10 @@ functions: "gather remote mongo coredumps": &gather_remote_mongo_coredumps command: shell.exec params: + shell: bash working_dir: "src" script: | - if [ ! -f ${aws_ec2_yml|""} ]; then + if [[ ! -f ${aws_ec2_yml|""} || -n "${ec2_ssh_failure}" ]]; then exit 0 fi ssh_connection_options="${ssh_identity} ${ssh_connection_options}" @@ -1704,9 +1746,10 @@ functions: "copy remote mongo coredumps": ©_remote_mongo_coredumps command: shell.exec params: + shell: bash working_dir: "src" script: | - if [ ! -f ${aws_ec2_yml|""} ]; then + if [[ ! -f ${aws_ec2_yml|""} || -n "${ec2_ssh_failure}" ]]; then exit 0 fi ssh_connection_options="${ssh_identity} ${ssh_connection_options}" @@ -1867,7 +1910,7 @@ pre: # For ssh disable the options GSSAPIAuthentication, CheckHostIP, StrictHostKeyChecking # & UserKnownHostsFile, since these are local connections from one AWS instance to another. - key: ssh_connection_options - value: -o GSSAPIAuthentication=no -o CheckHostIP=no -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=20 -o ConnectionAttempts=20 + value: -o GSSAPIAuthentication=no -o CheckHostIP=no -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 -o ConnectionAttempts=20 - key: ssh_retries value: "10" - key: set_sudo |