diff options
-rw-r--r-- | etc/evergreen.yml | 222 |
1 files changed, 127 insertions, 95 deletions
diff --git a/etc/evergreen.yml b/etc/evergreen.yml index 4d0539187dc..1c74905f743 100644 --- a/etc/evergreen.yml +++ b/etc/evergreen.yml @@ -457,8 +457,6 @@ functions: start_time=$(date +%s) lein run --tarball "file:///root/mongo-binaries.tgz" ${jepsen_key_time_limit} ${jepsen_protocol_version} ${jepsen_read_concern} ${jepsen_read_with_find_and_modify} ${jepsen_storage_engine} ${jepsen_time_limit} ${jepsen_write_concern} exit_code=$? - # Create exit_code file, so we can signal an error, after the archive/put steps have run. - echo $exit_code > ../../jepsen-exit_code-${task_id}-${execution}.txt end_time=$(date +%s) elapsed_secs=$((end_time-start_time)) if [ $exit_code -eq 0 ]; then @@ -470,51 +468,7 @@ functions: fi # Create report.json echo "{\"failures\": $failures, \"results\": [{\"status\": $status, \"exit_code\": $exit_code, \"test_file\": \"${task_name}\", \"start\": $start_time, \"end\": $end_time, \"elapsed\": $elapsed_secs}]}" > ../../report.json - - command: shell.exec - params: - working_dir: src/jepsen/mongodb - script: | - # Copy mongod.log from each LXC host for archive purposes - numVms=5 - for i in $(seq 1 $numVms) - do - hostn=n$i - if [ ! -d store/latest ]; then - echo "Creating the store/latest directory, which was not created by Jepsen" - mkdir -p store/latest - fi - mongod_log=store/latest/mongod-$hostn.log - scp root@$hostn:mongod.log $mongod_log - if [ -f $mongod_log ]; then - echo "Copied mongod.log from $hostn to $mongod_log" - else - echo "Failed to copy mongod.log from $hostn to $mongod_log" - fi - done - # Always exit without an error so the archive steps will run. - exit 0 - - command: archive.targz_pack - params: - target: "src/jepsen-results.tgz" - source_dir: "src/jepsen/mongodb/store" - include: - - "./**" - - command: s3.put - params: - aws_key: ${aws_key} - aws_secret: ${aws_secret} - local_file: src/jepsen-results.tgz - remote_file: mongodb-mongo-master/${build_variant}/${revision}/jepsen/jepsen-results-${task_id}-${execution}.tgz - bucket: mciuploads - permissions: public-read - content_type: ${content_type|application/x-gzip} - display_name: Jepsen Test Results - ${execution} - - command: shell.exec - type: test - params: - working_dir: src - script: | - exit $(cat jepsen-exit_code-${task_id}-${execution}.txt) + exit $exit_code "run jstestfuzz": - command: shell.exec @@ -697,36 +651,127 @@ post: params: file_location: src/report.json - func: "kill processes" - - command: archive.targz_pack + + # Print out any Out of Memory killed process messages. + - command: shell.exec params: - target: "mongo-coredumps.tgz" - source_dir: "./" - include: - - "./**.mdmp" - # Using shell and tar to recurse properly to all possible diagnostic.data subdirectories. The - # archive.targz_pack command is not being used here because the command's glob support did not - # allow us to gather all directories. + system_log: true + working_dir: src # Temporary files created in src will be cleaned up in "pre". + script: | + ${set_sudo} + # Use dmesg -T option, if supported, to display timestamps. + dmesg=dmesg + dmesg -T > /dev/null 2>&1 + if [ $? -eq 0 ]; then + dmesg="dmesg -T" + fi + $sudo $dmesg 2> /dev/null > dmesg.txt + if [ $? -ne 0 ]; then + echo "Cannot check for OOM (Out of memory) killed processes on this platform" + exit 0 + fi + egrep -i '(Out of memory|OOM[- ]killer|Killed process)' dmesg.txt > oom.txt + if [ -s oom.txt ]; then + echo "OOM (Out of memory) killed processes detected" + cat oom.txt + else + echo "No OOM (Out of memory) killed processes detected" + fi + + # Gather and archive FTDC data. - command: shell.exec params: working_dir: src script: | + # Using shell and tar to recurse properly to all possible diagnostic.data subdirectories. + # The archive.targz_pack command is not being used here because the command's glob support + # did not allow us to gather all directories. if [ -d /data/db ]; then file_list=$(cd /data/db && find . -type d -name diagnostic.data) if [ -n "$file_list" ]; then ${tar|tar} cvzf diagnostic-data.tgz -C /data/db $file_list fi fi + - command: s3.put + params: + aws_key: ${aws_key} + aws_secret: ${aws_secret} + local_file: src/diagnostic-data.tgz + remote_file: mongodb-mongo-master/${build_variant}/${revision}/ftdc/mongo-diagnostic-data-${task_id}-${execution}.tgz + bucket: mciuploads + permissions: public-read + content_type: ${content_type|application/x-gzip} + display_name: FTDC Diagnostic Data - Execution ${execution} + optional: true + + # Gather and archive the Jepsen mongod logs. - command: shell.exec params: working_dir: src script: | - # removes files from the (local) scons cache when it's over a - # threshold, to the $prune_ratio percentage. Ideally override - # these default values in the distro config in evergreen. - - if [ -d "${scons_cache_path}" ]; then - ${python|python} buildscripts/scons_cache_prune.py --cache-dir '${scons_cache_path}' --cache-size ${scons_cache_size|200} --prune-ratio ${scons_prune_ratio|0.8} + # For Jepsen tasks, archive the mongod log files. + if [ -d jepsen ]; then + # Copy mongod.log from each LXC host for archive purposes. The log can reside in + # 1 of 2 places: + # If the task completed: /root + # If the task failed to complete: /opt/mongodb + # There may be a mongod.log in both places, but the one in /opt/mongodb should override + # the one in /root, since the log in /root could be from a previous successful run. + numVms=5 + for i in $(seq 1 $numVms) + do + hostn=n$i + mongod_log=mongod-$hostn.log + for mongod_log_remote in /root/mongod.log /opt/mongodb/mongod.log + do + scp -q root@$hostn:$mongod_log_remote $mongod_log 2> /dev/null + if [ -f $mongod_log ]; then + echo "Copied $mongod_log_remote from $hostn to $mongod_log" + fi + done + if [ ! -f $mongod_log ]; then + echo "No mongod.log from $hostn copied to $mongod_log" + fi + done fi + - command: archive.targz_pack + params: + target: "src/jepsen-mongod-logs.tgz" + source_dir: "src" + include: + - "mongod*.log" + - command: s3.put + params: + aws_key: ${aws_key} + aws_secret: ${aws_secret} + local_file: src/jepsen-mongod-logs.tgz + remote_file: mongodb-mongo-master/${build_variant}/${revision}/jepsen/jepsen-mongod-logs-${task_id}-${execution}.tgz + bucket: mciuploads + permissions: public-read + content_type: ${content_type|application/x-gzip} + display_name: Jepsen mongod Logs - ${execution} + optional: true + + # Gather and archive the Jepsen results. + - command: archive.targz_pack + params: + target: "src/jepsen-results.tgz" + source_dir: "src/jepsen/mongodb/store" + include: + - "./**" + - command: s3.put + params: + aws_key: ${aws_key} + aws_secret: ${aws_secret} + local_file: src/jepsen-results.tgz + remote_file: mongodb-mongo-master/${build_variant}/${revision}/jepsen/jepsen-results-${task_id}-${execution}.tgz + bucket: mciuploads + permissions: public-read + content_type: ${content_type|application/x-gzip} + display_name: Jepsen Test Results - ${execution} + optional: true + + # Gather and archive mongo coredumps. - command: shell.exec params: working_dir: src @@ -737,11 +782,10 @@ post: fi - command: archive.targz_pack params: - target: "diskstats.tgz" + target: "mongo-coredumps.tgz" source_dir: "./" include: - - "./mongo-diskstats*" - - "./mongo-diskstats*.csv" + - "./**.mdmp" - command: s3.put params: aws_key: ${aws_key} @@ -753,16 +797,15 @@ post: content_type: ${content_type|application/x-gzip} display_name: Core Dumps - Execution ${execution} optional: true - - command: s3.put + + # Gather and archive disk statistics. + - command: archive.targz_pack params: - aws_key: ${aws_key} - aws_secret: ${aws_secret} - local_file: src/diagnostic-data.tgz - remote_file: mongodb-mongo-master/${build_variant}/${revision}/ftdc/mongo-diagnostic-data-${task_id}-${execution}.tgz - bucket: mciuploads - permissions: public-read - content_type: ${content_type|application/x-gzip} - display_name: FTDC Diagnostic Data - Execution ${execution} + target: "diskstats.tgz" + source_dir: "./" + include: + - "./mongo-diskstats*" + - "./mongo-diskstats*.csv" - command: s3.put params: aws_key: ${aws_key} @@ -774,34 +817,23 @@ post: content_type: ${content_type|application/x-gzip} display_name: Disk Stats - Execution ${execution} optional: true + + # Cleanup steps. - command: shell.exec params: + working_dir: src script: | - rm -rf ~/.aws + # removes files from the (local) scons cache when it's over a + # threshold, to the $prune_ratio percentage. Ideally override + # these default values in the distro config in evergreen. + + if [ -d "${scons_cache_path}" ]; then + ${python|python} buildscripts/scons_cache_prune.py --cache-dir '${scons_cache_path}' --cache-size ${scons_cache_size|200} --prune-ratio ${scons_prune_ratio|0.8} + fi - command: shell.exec params: - system_log: true - working_dir: src # Temporary files created in src will be cleaned up in "pre". script: | - ${set_sudo} - # Use dmesg -T option, if supported, to display timestamps. - dmesg=dmesg - dmesg -T > /dev/null 2>&1 - if [ $? -eq 0 ]; then - dmesg="dmesg -T" - fi - $sudo $dmesg 2> /dev/null > dmesg.txt - if [ $? -ne 0 ]; then - echo "Cannot check for OOM (Out of memory) killed processes on this platform" - exit 0 - fi - egrep -i '(Out of memory|OOM[- ]killer|Killed process)' dmesg.txt > oom.txt - if [ -s oom.txt ]; then - echo "OOM (Out of memory) killed processes detected" - cat oom.txt - else - echo "No OOM (Out of memory) killed processes detected" - fi + rm -rf ~/.aws - command: shell.cleanup timeout: |