summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Abrahams <jonathan@mongodb.com>2016-08-09 23:56:37 -0400
committerJonathan Abrahams <jonathan@mongodb.com>2016-08-10 00:11:28 -0400
commit92ee341f57636f644799d9ee44a1d2967c6a6fb4 (patch)
treedc7025a7b1cc6debe6db0994a0d7d71ce2339255
parent31035ceaaef56a85e552913dd065b8a130424b0f (diff)
downloadmongo-92ee341f57636f644799d9ee44a1d2967c6a6fb4.tar.gz
SERVER-25321 Archive mongod logs for Jepsen tasks, even on failure.
Reorganize post processing step
-rw-r--r--etc/evergreen.yml222
1 files changed, 127 insertions, 95 deletions
diff --git a/etc/evergreen.yml b/etc/evergreen.yml
index 4d0539187dc..1c74905f743 100644
--- a/etc/evergreen.yml
+++ b/etc/evergreen.yml
@@ -457,8 +457,6 @@ functions:
start_time=$(date +%s)
lein run --tarball "file:///root/mongo-binaries.tgz" ${jepsen_key_time_limit} ${jepsen_protocol_version} ${jepsen_read_concern} ${jepsen_read_with_find_and_modify} ${jepsen_storage_engine} ${jepsen_time_limit} ${jepsen_write_concern}
exit_code=$?
- # Create exit_code file, so we can signal an error, after the archive/put steps have run.
- echo $exit_code > ../../jepsen-exit_code-${task_id}-${execution}.txt
end_time=$(date +%s)
elapsed_secs=$((end_time-start_time))
if [ $exit_code -eq 0 ]; then
@@ -470,51 +468,7 @@ functions:
fi
# Create report.json
echo "{\"failures\": $failures, \"results\": [{\"status\": $status, \"exit_code\": $exit_code, \"test_file\": \"${task_name}\", \"start\": $start_time, \"end\": $end_time, \"elapsed\": $elapsed_secs}]}" > ../../report.json
- - command: shell.exec
- params:
- working_dir: src/jepsen/mongodb
- script: |
- # Copy mongod.log from each LXC host for archive purposes
- numVms=5
- for i in $(seq 1 $numVms)
- do
- hostn=n$i
- if [ ! -d store/latest ]; then
- echo "Creating the store/latest directory, which was not created by Jepsen"
- mkdir -p store/latest
- fi
- mongod_log=store/latest/mongod-$hostn.log
- scp root@$hostn:mongod.log $mongod_log
- if [ -f $mongod_log ]; then
- echo "Copied mongod.log from $hostn to $mongod_log"
- else
- echo "Failed to copy mongod.log from $hostn to $mongod_log"
- fi
- done
- # Always exit without an error so the archive steps will run.
- exit 0
- - command: archive.targz_pack
- params:
- target: "src/jepsen-results.tgz"
- source_dir: "src/jepsen/mongodb/store"
- include:
- - "./**"
- - command: s3.put
- params:
- aws_key: ${aws_key}
- aws_secret: ${aws_secret}
- local_file: src/jepsen-results.tgz
- remote_file: mongodb-mongo-master/${build_variant}/${revision}/jepsen/jepsen-results-${task_id}-${execution}.tgz
- bucket: mciuploads
- permissions: public-read
- content_type: ${content_type|application/x-gzip}
- display_name: Jepsen Test Results - ${execution}
- - command: shell.exec
- type: test
- params:
- working_dir: src
- script: |
- exit $(cat jepsen-exit_code-${task_id}-${execution}.txt)
+ exit $exit_code
"run jstestfuzz":
- command: shell.exec
@@ -697,36 +651,127 @@ post:
params:
file_location: src/report.json
- func: "kill processes"
- - command: archive.targz_pack
+
+ # Print out any Out of Memory killed process messages.
+ - command: shell.exec
params:
- target: "mongo-coredumps.tgz"
- source_dir: "./"
- include:
- - "./**.mdmp"
- # Using shell and tar to recurse properly to all possible diagnostic.data subdirectories. The
- # archive.targz_pack command is not being used here because the command's glob support did not
- # allow us to gather all directories.
+ system_log: true
+ working_dir: src # Temporary files created in src will be cleaned up in "pre".
+ script: |
+ ${set_sudo}
+ # Use dmesg -T option, if supported, to display timestamps.
+ dmesg=dmesg
+ dmesg -T > /dev/null 2>&1
+ if [ $? -eq 0 ]; then
+ dmesg="dmesg -T"
+ fi
+ $sudo $dmesg 2> /dev/null > dmesg.txt
+ if [ $? -ne 0 ]; then
+ echo "Cannot check for OOM (Out of memory) killed processes on this platform"
+ exit 0
+ fi
+ egrep -i '(Out of memory|OOM[- ]killer|Killed process)' dmesg.txt > oom.txt
+ if [ -s oom.txt ]; then
+ echo "OOM (Out of memory) killed processes detected"
+ cat oom.txt
+ else
+ echo "No OOM (Out of memory) killed processes detected"
+ fi
+
+ # Gather and archive FTDC data.
- command: shell.exec
params:
working_dir: src
script: |
+ # Using shell and tar to recurse properly to all possible diagnostic.data subdirectories.
+ # The archive.targz_pack command is not being used here because the command's glob support
+ # did not allow us to gather all directories.
if [ -d /data/db ]; then
file_list=$(cd /data/db && find . -type d -name diagnostic.data)
if [ -n "$file_list" ]; then
${tar|tar} cvzf diagnostic-data.tgz -C /data/db $file_list
fi
fi
+ - command: s3.put
+ params:
+ aws_key: ${aws_key}
+ aws_secret: ${aws_secret}
+ local_file: src/diagnostic-data.tgz
+ remote_file: mongodb-mongo-master/${build_variant}/${revision}/ftdc/mongo-diagnostic-data-${task_id}-${execution}.tgz
+ bucket: mciuploads
+ permissions: public-read
+ content_type: ${content_type|application/x-gzip}
+ display_name: FTDC Diagnostic Data - Execution ${execution}
+ optional: true
+
+ # Gather and archive the Jepsen mongod logs.
- command: shell.exec
params:
working_dir: src
script: |
- # removes files from the (local) scons cache when it's over a
- # threshold, to the $prune_ratio percentage. Ideally override
- # these default values in the distro config in evergreen.
-
- if [ -d "${scons_cache_path}" ]; then
- ${python|python} buildscripts/scons_cache_prune.py --cache-dir '${scons_cache_path}' --cache-size ${scons_cache_size|200} --prune-ratio ${scons_prune_ratio|0.8}
+ # For Jepsen tasks, archive the mongod log files.
+ if [ -d jepsen ]; then
+ # Copy mongod.log from each LXC host for archive purposes. The log can reside in
+ # 1 of 2 places:
+ # If the task completed: /root
+ # If the task failed to complete: /opt/mongodb
+ # There may be a mongod.log in both places, but the one in /opt/mongodb should override
+ # the one in /root, since the log in /root could be from a previous successful run.
+ numVms=5
+ for i in $(seq 1 $numVms)
+ do
+ hostn=n$i
+ mongod_log=mongod-$hostn.log
+ for mongod_log_remote in /root/mongod.log /opt/mongodb/mongod.log
+ do
+ scp -q root@$hostn:$mongod_log_remote $mongod_log 2> /dev/null
+ if [ -f $mongod_log ]; then
+ echo "Copied $mongod_log_remote from $hostn to $mongod_log"
+ fi
+ done
+ if [ ! -f $mongod_log ]; then
+ echo "No mongod.log from $hostn copied to $mongod_log"
+ fi
+ done
fi
+ - command: archive.targz_pack
+ params:
+ target: "src/jepsen-mongod-logs.tgz"
+ source_dir: "src"
+ include:
+ - "mongod*.log"
+ - command: s3.put
+ params:
+ aws_key: ${aws_key}
+ aws_secret: ${aws_secret}
+ local_file: src/jepsen-mongod-logs.tgz
+ remote_file: mongodb-mongo-master/${build_variant}/${revision}/jepsen/jepsen-mongod-logs-${task_id}-${execution}.tgz
+ bucket: mciuploads
+ permissions: public-read
+ content_type: ${content_type|application/x-gzip}
+ display_name: Jepsen mongod Logs - ${execution}
+ optional: true
+
+ # Gather and archive the Jepsen results.
+ - command: archive.targz_pack
+ params:
+ target: "src/jepsen-results.tgz"
+ source_dir: "src/jepsen/mongodb/store"
+ include:
+ - "./**"
+ - command: s3.put
+ params:
+ aws_key: ${aws_key}
+ aws_secret: ${aws_secret}
+ local_file: src/jepsen-results.tgz
+ remote_file: mongodb-mongo-master/${build_variant}/${revision}/jepsen/jepsen-results-${task_id}-${execution}.tgz
+ bucket: mciuploads
+ permissions: public-read
+ content_type: ${content_type|application/x-gzip}
+ display_name: Jepsen Test Results - ${execution}
+ optional: true
+
+ # Gather and archive mongo coredumps.
- command: shell.exec
params:
working_dir: src
@@ -737,11 +782,10 @@ post:
fi
- command: archive.targz_pack
params:
- target: "diskstats.tgz"
+ target: "mongo-coredumps.tgz"
source_dir: "./"
include:
- - "./mongo-diskstats*"
- - "./mongo-diskstats*.csv"
+ - "./**.mdmp"
- command: s3.put
params:
aws_key: ${aws_key}
@@ -753,16 +797,15 @@ post:
content_type: ${content_type|application/x-gzip}
display_name: Core Dumps - Execution ${execution}
optional: true
- - command: s3.put
+
+ # Gather and archive disk statistics.
+ - command: archive.targz_pack
params:
- aws_key: ${aws_key}
- aws_secret: ${aws_secret}
- local_file: src/diagnostic-data.tgz
- remote_file: mongodb-mongo-master/${build_variant}/${revision}/ftdc/mongo-diagnostic-data-${task_id}-${execution}.tgz
- bucket: mciuploads
- permissions: public-read
- content_type: ${content_type|application/x-gzip}
- display_name: FTDC Diagnostic Data - Execution ${execution}
+ target: "diskstats.tgz"
+ source_dir: "./"
+ include:
+ - "./mongo-diskstats*"
+ - "./mongo-diskstats*.csv"
- command: s3.put
params:
aws_key: ${aws_key}
@@ -774,34 +817,23 @@ post:
content_type: ${content_type|application/x-gzip}
display_name: Disk Stats - Execution ${execution}
optional: true
+
+ # Cleanup steps.
- command: shell.exec
params:
+ working_dir: src
script: |
- rm -rf ~/.aws
+ # removes files from the (local) scons cache when it's over a
+ # threshold, to the $prune_ratio percentage. Ideally override
+ # these default values in the distro config in evergreen.
+
+ if [ -d "${scons_cache_path}" ]; then
+ ${python|python} buildscripts/scons_cache_prune.py --cache-dir '${scons_cache_path}' --cache-size ${scons_cache_size|200} --prune-ratio ${scons_prune_ratio|0.8}
+ fi
- command: shell.exec
params:
- system_log: true
- working_dir: src # Temporary files created in src will be cleaned up in "pre".
script: |
- ${set_sudo}
- # Use dmesg -T option, if supported, to display timestamps.
- dmesg=dmesg
- dmesg -T > /dev/null 2>&1
- if [ $? -eq 0 ]; then
- dmesg="dmesg -T"
- fi
- $sudo $dmesg 2> /dev/null > dmesg.txt
- if [ $? -ne 0 ]; then
- echo "Cannot check for OOM (Out of memory) killed processes on this platform"
- exit 0
- fi
- egrep -i '(Out of memory|OOM[- ]killer|Killed process)' dmesg.txt > oom.txt
- if [ -s oom.txt ]; then
- echo "OOM (Out of memory) killed processes detected"
- cat oom.txt
- else
- echo "No OOM (Out of memory) killed processes detected"
- fi
+ rm -rf ~/.aws
- command: shell.cleanup
timeout: