summaryrefslogtreecommitdiff
path: root/etc/evergreen.yml
diff options
context:
space:
mode:
authorJonathan Abrahams <jonathan@mongodb.com>2018-07-12 16:27:51 -0400
committerJonathan Abrahams <jonathan@mongodb.com>2018-07-12 16:27:51 -0400
commit999672772454d09172121559140f683675479e1f (patch)
treeabccdcd7ca9bbe16a39ac60b2087ed198aa2d51e /etc/evergreen.yml
parent4b664ef89ee6e5d7a2c8f86da2e80dc60d5f38b8 (diff)
downloadmongo-999672772454d09172121559140f683675479e1f.tar.gz
SERVER-35724 Remote EC2 hosts which are not accessible via ssh should fail with system error
Diffstat (limited to 'etc/evergreen.yml')
-rw-r--r--etc/evergreen.yml73
1 files changed, 58 insertions, 15 deletions
diff --git a/etc/evergreen.yml b/etc/evergreen.yml
index eb66fae2f57..2dc7d747c9b 100644
--- a/etc/evergreen.yml
+++ b/etc/evergreen.yml
@@ -157,7 +157,7 @@ variables:
# TODO: Remove psutil from virtualenv_requirements when BUILD-3083 is done
- &powercycle_ec2_instance
aws_ec2_yml: aws_ec2.yml
- expire_hours: "3"
+ ec2_expire_hours: "24"
ec2_monitor_files: proc.json system.json
monitor_proc_file: proc.json
monitor_system_file: system.json
@@ -187,10 +187,12 @@ variables:
value: /data/db
- key: log_path
value: ${remote_dir}/mongod.log
+ - key: exit_file
+ value: powercycle_exit.yml
- &powercycle_test
ec2_artifacts: ${log_path} ${db_path} ${backup_path_after} ${backup_path_before}
- program_options: --logLevel=info --backupPathBefore=${backup_path_before} --backupPathAfter=${backup_path_after}
+ program_options: --exitYamlFile=${exit_file} --logLevel=info --backupPathBefore=${backup_path_before} --backupPathAfter=${backup_path_after}
connection_options: --sshUserHost=${private_ip_address} --sshConnection=\"${ssh_identity} ${ssh_connection_options}\"
test_options: --testLoops=15 --seedDocNum=10000 --rsync --rsyncExcludeFiles=diagnostic.data/metrics.interim* --validate=local --canary=local
crash_options: --crashMethod=internal --crashOption=${windows_crash_cmd} --crashWaitTime=45 --jitterForCrashWaitTime=5 --instanceId=${instance_id}
@@ -1021,6 +1023,8 @@ functions:
done
fi
+ # Set an exit trap so we can save the real exit status (see SERVER-34033).
+ trap 'echo $? > error_exit.txt; exit 0' EXIT
config_file=powertest.yml
eval $python pytests/powertest.py \
"--saveConfigOptions=$config_file \
@@ -1039,10 +1043,22 @@ functions:
${mongod_extra_options}"
set +o errexit
$python -u pytests/powertest.py --configFile=$config_file
- # SERVER-34033: Figure out why shell.exec exits at this point.
- test_status=$?
- echo "Completed pytests/powertest.py, status: $test_status"
- exit $test_status
+
+ - command: expansions.update
+ params:
+ ignore_missing_file: true
+ file: src/${exit_file}
+
+ - command: shell.exec
+ params:
+ working_dir: src
+ shell: bash
+ script: |
+ # Trigger a system failure if powertest.py failed due to ssh access.
+ if [ -n "${ec2_ssh_failure}" ]; then
+ echo "ec2_ssh_failure detected - $(cat ${exit_file})"
+ exit ${exit_code}
+ fi
- command: shell.exec
params:
@@ -1064,6 +1080,25 @@ functions:
ignore_missing_file: true
file: src/ec2_artifacts.yml
+ - command: shell.exec
+ type: test
+ params:
+ shell: bash
+ script: |
+ # Test exits from here with specified exit_code.
+ if [ -n "${exit_code}" ]; then
+ # Python program saved exit_code
+ exit_code=${exit_code}
+ elif [ -f error_exit.txt ]; then
+ # Bash trap exit_code
+ exit_code=$(cat error_exit.txt)
+ else
+ exit_code=0
+ fi
+ echo "Exiting powercycle with code $exit_code"
+ exit $exit_code
+
+
"do multiversion setup" :
command: shell.exec
params:
@@ -1279,7 +1314,7 @@ functions:
sleep 30
done
- "set up EC2 instance":
+ "set up EC2 instance": &set_up_ec2_instance
- command: shell.exec
params:
working_dir: src
@@ -1305,8 +1340,12 @@ functions:
done
fi
- if [ ! -z "${expire_hours}" ]; then
- expire_hours="-e ${expire_hours}"
+ if [ -n "${ec2_expire_hours}" ]; then
+ expire_hours="-e ${ec2_expire_hours}"
+ # Since Windows hosts are expensive to keep running we'll expire it after 3 hours.
+ if [ "Windows_NT" = "$OS" ]; then
+ expire_hours="-e 3"
+ fi
fi
# Clone another instance of this host in EC2.
@@ -1629,7 +1668,7 @@ functions:
script: |
${activate_virtualenv}
# Tar/zip artifacts on remote host.
- if [ -z "${ec2_artifacts}" ]; then
+ if [[ -z "${ec2_artifacts}" || -n "${ec2_ssh_failure}" ]]; then
exit 0
fi
cmd="${tar|tar} czf ec2_artifacts.tgz ${ec2_artifacts}"
@@ -1649,7 +1688,7 @@ functions:
script: |
${activate_virtualenv}
# Copy remote artifacts.
- if [ -z "${ec2_artifacts}" ]; then
+ if [[ -z "${ec2_artifacts}" || -n "${ec2_ssh_failure}" ]]; then
exit 0
fi
ssh_connection_options="${ssh_identity} ${ssh_connection_options}"
@@ -1664,9 +1703,11 @@ functions:
"cleanup EC2 instance": &cleanup_ec2_instance
command: shell.exec
params:
+ shell: bash
working_dir: src
script: |
- if [ -z ${instance_id} ]; then
+ # We do not terminate the EC2 instance if there was an ec2_ssh_failure.
+ if [[ -z "${ec2_artifacts}" || -n "${ec2_ssh_failure}" ]]; then
exit 0
fi
${activate_virtualenv}
@@ -1677,9 +1718,10 @@ functions:
"gather remote mongo coredumps": &gather_remote_mongo_coredumps
command: shell.exec
params:
+ shell: bash
working_dir: "src"
script: |
- if [ ! -f ${aws_ec2_yml|""} ]; then
+ if [[ ! -f ${aws_ec2_yml|""} || -n "${ec2_ssh_failure}" ]]; then
exit 0
fi
ssh_connection_options="${ssh_identity} ${ssh_connection_options}"
@@ -1704,9 +1746,10 @@ functions:
"copy remote mongo coredumps": &copy_remote_mongo_coredumps
command: shell.exec
params:
+ shell: bash
working_dir: "src"
script: |
- if [ ! -f ${aws_ec2_yml|""} ]; then
+ if [[ ! -f ${aws_ec2_yml|""} || -n "${ec2_ssh_failure}" ]]; then
exit 0
fi
ssh_connection_options="${ssh_identity} ${ssh_connection_options}"
@@ -1867,7 +1910,7 @@ pre:
# For ssh disable the options GSSAPIAuthentication, CheckHostIP, StrictHostKeyChecking
# & UserKnownHostsFile, since these are local connections from one AWS instance to another.
- key: ssh_connection_options
- value: -o GSSAPIAuthentication=no -o CheckHostIP=no -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=20 -o ConnectionAttempts=20
+ value: -o GSSAPIAuthentication=no -o CheckHostIP=no -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 -o ConnectionAttempts=20
- key: ssh_retries
value: "10"
- key: set_sudo