buildscripts/setup_spawnhost_coredump


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330

#!/bin/bash

cd $HOME # workaround EVG-12829

unameOut=$(uname -s)
case "${unameOut}" in
    Linux*)     machine=Linux;;
    Darwin*)    machine=Mac;;
    CYGWIN*)    machine=Cygwin;;
    *)          machine="UNKNOWN:${unameOut}"
esac

TOOLCHAIN_ROOT=/opt/mongodbtoolchain/v4

if [[ "${machine}" = "Cygwin" ]]; then
    out_dir="/cygdrive/c/setup_script_output.txt"
    desktop_dir="/cygdrive/c/Users/Administrator/Desktop"

    {
        date
        env

        echo "----------------------"
        echo -e "\n=> Setting _NT_SOURCE_PATH environment variable for debuggers to pick up source files."
        SRC_DIR_HASH=$(readlink -f /cygdrive/z/data/mci/source-*)
        SRC_DIR="${SRC_DIR_HASH}/src"
        echo "Source Path: [${SRC_DIR}]"
        set -x;
        setx _NT_SOURCE_PATH "${SRC_DIR}"
        { set +x; } 2>/dev/null

        echo -e "\n=> Setting _NT_SYMBOL_PATH environment variable for debuggers to pick up the symbols."
        DBG_ARCHIVE_PARENT=$(readlink -f /cygdrive/z/data/mci/artifacts-*dist_test_debug)
        DBG_ARCHIVE=$(readlink -f ${DBG_ARCHIVE_PARENT}/debugsymbols-*.zip)
        DBG_ARCHIVE_TARGET_PARENT="${DBG_ARCHIVE_PARENT}/extracted_symbols"
        DBG_ARCHIVE_TARGET="${DBG_ARCHIVE_TARGET_PARENT}/dist-test/bin"
        echo "Symbols Dir: [${DBG_ARCHIVE_TARGET}]"

        echo -e "\n=> Extracting Symbol files."
        set -x;
        mkdir -p ${DBG_ARCHIVE_TARGET_PARENT}
        unzip -n ${DBG_ARCHIVE} -d ${DBG_ARCHIVE_TARGET_PARENT}
        setx _NT_SYMBOL_PATH "${DBG_ARCHIVE_TARGET};srv*;"
        { set +x; } 2>/dev/null

        echo -e "\n=> Extracting Core Dump to Desktop."
        COREDUMP_ARCHIVE=$(readlink -f /cygdrive/z/data/mci/artifacts-*/mongo-coredumps*.tgz 2>/dev/null)
        COREDUMP_ARCHIVE_PARENT=$(dirname ${COREDUMP_ARCHIVE})
        COREDUMP_ARCHIVE_TARGET="${COREDUMP_ARCHIVE_PARENT}/extracted_dump"
        set -x;
        mkdir -p ${COREDUMP_ARCHIVE_TARGET}
        tar -xzvf ${COREDUMP_ARCHIVE} -C ${COREDUMP_ARCHIVE_TARGET}
        cp -r ${COREDUMP_ARCHIVE_TARGET}/* ${desktop_dir}
        { set +x; } 2>/dev/null
        echo "Copied to Desktop."
    } &> ${out_dir}

    cp ${out_dir} ${desktop_dir}
else
    # Communicate to users that logged in before the script started that nothing is ready.
    wall "The setup_spawnhost_coredump script has just started setting up the debugging environment."

    # Write this file that gets cat'ed on login to communicate to users logging in if this setup script is still running.
    echo '+-----------------------------------------------------------------------------------+' > ~/.setup_spawnhost_coredump_progress
    echo "| The setup script is still setting up data files for inspection on a [${machine}] host. |" >> ~/.setup_spawnhost_coredump_progress
    echo '+-----------------------------------------------------------------------------------+' >> ~/.setup_spawnhost_coredump_progress

    cat >> ~/.profile <<EOF
cat ~/.setup_spawnhost_coredump_progress
# Coredumps generated by a toolchain built mongodb can be problematic when examined with the system
# gdb.
alias gdb='${TOOLCHAIN_ROOT}/bin/gdb'
# As per below, put the user into the appropriate directory. This is where gdb is expected to be
# invoked from.
cd debug
echo "Debuggable binaries:"
ls -l mongo* 2>/dev/null | grep -v debug$
ls -l bin/ 2>/dev/null
( ls -l mongo* &>/dev/null | grep -v debug$ || ls -l bin/ &>/dev/null ) || echo " [none]"

for item in "mongo" "mongod" "mongos"; do
    echo "\${item} core dumps:"
    ls -l dump_\${item}.* 2>/dev/null || echo " [none]"
done

echo "Core dumps from unknown processes (crashed processes typically found here):"
ls -l dump_* 2>/dev/null | grep -v mongo || echo " [none]"

echo
echo "To examine a core dump, type 'gdb ./<binary> ./<core file>'"
cat ~/.setup_spawnhost_coredump_progress
EOF

    echo 'if [ -f ~/.profile ]; then
    . ~/.profile
fi' >> .bash_profile

    # Make a directory on the larger EBS volume. Soft-link it under the home directory. The smaller home
    # volume can have trouble particularly with coredumps from sharded timeouts.
    mkdir -p /data/debug
    ln -s /data/debug .
    cd debug

    # As the name suggests, pretty printers. Primarily for boost::optional<T>
    git clone git@github.com:mongodb-forks/Boost-Pretty-Printer.git --branch mongodb-stable &

    archive_fail() {
        echo "Error: archive [${1}] not found." >&2
    }

    # Discover and unarchive necessary files and source code. This will put mongo binaries and their
    # partner .debug files in the same `debug/bin` directory. The `bin` directory will later be symbolic
    # linked into the top-level (`debug`) directory. Shared library files and their debug symbols will
    # be dumped into a `debug/lib` directory for tidiness. The mongo `<reporoot>/src/` directory is soft
    # linked as `debug/src`. The .gdbinit file assumes gdb is being run from the `debug` directory.
    BIN_ARCHIVE=$(ls /data/mci/artifacts-*archive_dist_test*/mongo-*.tgz 2>/dev/null)
    if [[ -n $BIN_ARCHIVE ]]; then
        # Have shell expand braces before passing the wildcard to tar.
        bin_files_pattern=(\*/bin/mongo{d,s,,bridge})
        tar --wildcards --strip-components=1 -xzf $BIN_ARCHIVE "${bin_files_pattern[@]}"
        tar --wildcards --strip-components=1 -xzf $BIN_ARCHIVE '*/lib/*' &
    else
        archive_fail "bin"
    fi

    DBG_ARCHIVE=$(ls /data/mci/artifacts-*archive_dist_test_debug/debugsymbols-*.tgz 2>/dev/null)
    if [[ -n $DBG_ARCHIVE ]]; then
        # Support discovering split-dwarf files. Specifically, capture both <file>.debug and <file>.dwp
        # files.
        # Have shell expand braces before passing the wildcard to tar.
        dbg_files_pattern=(\*/bin/mongo{d,s,,bridge}.{debug,dwp})
        tar --wildcards --strip-components=1 -xzf $DBG_ARCHIVE "${dbg_files_pattern[@]}" &
        tar --wildcards --strip-components=1 -xzf $DBG_ARCHIVE '*/lib/*' &
    else
        archive_fail "debug"
    fi

    UNITTEST_ARCHIVE=$(ls /data/mci/artifacts-*run_unittests/mongo-unittests-*.tgz 2>/dev/null)
    if [[ -n $UNITTEST_ARCHIVE ]]; then
        tar --wildcards --strip-components=0 -xzf $UNITTEST_ARCHIVE 'bin/*' &
        tar --wildcards -xzf $UNITTEST_ARCHIVE 'lib/*' &
    else
        archive_fail "unittest"
    fi

    BENCHMARK_ARCHIVE=$(ls /data/mci/artifacts-*compile_upload_benchmarks/mongodb_mongo_*.tgz 2>/dev/null)
    if [[ -n $BENCHMARK_ARCHIVE ]]; then
        tar --wildcards --strip-components=2 -xzf $BENCHMARK_ARCHIVE '*/bin/*' &
    else
        archive_fail "benchmark"
    fi

    SRC_DIR=$(ls -d /data/mci/source-* 2>/dev/null)
    if [[ -n $SRC_DIR ]]; then
        ln -s ${SRC_DIR}/.gdbinit .
        ln -s ${SRC_DIR}/src src
        ln -s ${SRC_DIR}/buildscripts buildscripts

        # Install pymongo to get the bson library for pretty-printers.
        ${TOOLCHAIN_ROOT}/bin/pip3 install -r ${SRC_DIR}/etc/pip/dev-requirements.txt &
    else
        archive_fail "src"
    fi

    COREDUMP_ARCHIVE=$(ls /data/mci/artifacts-*/mongo-coredumps-*.tgz 2>/dev/null)
    if [[ -n $COREDUMP_ARCHIVE ]]; then
        tar -xzf $COREDUMP_ARCHIVE &
    else
        archive_fail "coredump"
    fi

    echo "Waiting for background processes to complete."
    wait

    # Symbolic linking all of the executable files is sufficient for `gdb ./mongod ./dump_mongod.core`
    # to succeed. This inadvertantly also links in the ".debug" files which is unnecessary, but
    # harmless. gdb expects the .debug files to live adjacent to the physical binary.
    find bin -type f -perm -o=x -exec ln -s {} . \;

    # This script checks a bin for the dwarf version and then generates an index if the bin does not already have an index. eu-readelf is used in place of readelf as it is much faster.
    cat > add_index.sh <<EOF
#!/bin/bash

set -o pipefail

target_dir="\$(dirname \${1})"

dwarf_version="\$($TOOLCHAIN_ROOT/bin/eu-readelf --debug-dump=info \$1 | grep --line-buffered -E '^\s+Version:' | head -1 | awk -F, '{print(\$1)}' | awk '{print(\$2)}')"

if [[ \$dwarf_version == 5 ]]
then
    $TOOLCHAIN_ROOT/bin/gdb --batch-silent --quiet --nx --eval-command "save gdb-index -dwarf-5 \$target_dir" \$1
    if [ -f \${1}.debug_names ]
    then
        $TOOLCHAIN_ROOT/bin/objcopy --dump-section .debug_str=\${1}.debug_str.new \$1
        cat \${1}.debug_str >>\${1}.debug_str.new
        $TOOLCHAIN_ROOT/bin/objcopy --add-section .debug_names=\${1}.debug_names --set-section-flags .debug_names=readonly --update-section .debug_str=\${1}.debug_str.new \${1} \${1}
        rm -f \${1}.debug_names \${1}.debug_str.new \${1}.debug_str
    fi

elif [[ \$dwarf_version == 4 ]]
then
    $TOOLCHAIN_ROOT/bin/gdb --batch-silent --quiet --nx --eval-command "save gdb-index \$target_dir" \$1
    if [ -f \${1}.gdb-index ]
    then
        $TOOLCHAIN_ROOT/bin/objcopy --add-section .gdb_index=\${1}.gdb-index --set-section-flags .gdb_index=readonly \${1} \${1}
        rm -f \${1}.gdb-index
    fi

else
    echo "Can't determine debug info for \$1"
fi
EOF

    # After creating the index file in a separate debug file, the debuglink CRC
    # is no longer value, this will simply recreate the debuglink and therefore
    # update the CRC to match.
    cat > recalc_debuglink.sh <<EOF
#!/bin/bash

set -o pipefail

debuglink="\$($TOOLCHAIN_ROOT/bin/eu-readelf -S \$1 | grep '.gnu_debuglink')"

if [ ! -z "\$debuglink" ]
then
    $TOOLCHAIN_ROOT/bin/objcopy --remove-section ".gnu_debuglink" \$1
	$TOOLCHAIN_ROOT/bin/objcopy --add-gnu-debuglink "$(basename \$1).debug" \$1
fi
EOF

# this script creates a symlink in the toolchain lib/debug directory which is in the
# build-id format. This allows gdb to load the separate debug file and skip CRC
# checking.
cat > create_build_id_links.sh <<EOF
#!/bin/bash

set -o pipefail

build_id="\$($TOOLCHAIN_ROOT/bin/eu-readelf -n \$1 | grep 'Build ID:' | awk -F: '{print \$2}' | sed 's/ *//')"
gdb_debug_dir="\$(readlink $TOOLCHAIN_ROOT/bin/gdb)"
gdb_debug_dir="\$(dirname \$gdb_debug_dir)"
gdb_debug_dir="\$(dirname \$gdb_debug_dir)/lib/debug/.build-id/\${build_id:0:2}"
gdb_debug_file="\${build_id:2}.debug"
mkdir -p \$gdb_debug_dir
ln -s \$PWD/\$1 \$gdb_debug_dir/\$gdb_debug_file
EOF

    chmod +x ./add_index.sh
    chmod +x ./recalc_debuglink.sh
    chmod +x ./create_build_id_links.sh
    cpus=$(getconf _NPROCESSORS_ONLN)

    find bin lib -type f -perm -o=x | xargs --max-args=1 --max-procs=$cpus ./add_index.sh
    find bin lib -type f -perm -o=x | xargs --max-args=1 --max-procs=$cpus ./recalc_debuglink.sh

    # This script constructs symblinks based off the build-id so GDB can skip the crc check
    # normally performed during .gnu_debuglink loading.
    find bin lib -name "*.debug" -type f -perm -o=x | xargs --max-args=1 --max-procs=$cpus ./create_build_id_links.sh

    # Boost-Pretty-Printer supports auto-detection for the boost version but relies on the system
    # installed version of boost. To avoid this behavior we explicitly specify the boost_version.
    # Moreover, the most recent version of boost that Boost-Pretty-Printer verifies it supports is
    # version 1.73.0. While 1.73.0 is older than the version in the src/third_party/boost/
    # directory, the pretty printer in Boost-Pretty-Printer will display boost::optional values
    # correctly.
    cat >> ~/.gdbinit <<EOF
set auto-load safe-path /
set solib-search-path ./lib/
set pagination off
set print object on
set print static-members off
set print pretty on

python
import sys
sys.path.insert(0, './Boost-Pretty-Printer')
import boost
boost.register_printers(boost_version=(1, 73, 0))
end
dir $HOME/debug
EOF

    # Empty out the progress script that warns users about the set script still running when users log in.
    echo "" > ~/.setup_spawnhost_coredump_progress
    # Alert currently logged in users that this setup script has completed. Logging back in will ensure any
    # paths/environment variables will be set as intended.
    wall "The setup_spawnhost_coredump script has completed, please relogin to ensure the right environment variables are set."
fi

# Send a Slack notification as the very last thing the setup_spawnhost_coredump script does.
# This way a Server engineer can temporarily forget about the Evergreen host they spawned until the
# paths and environment variables are configured as intended for when they first connect.
if [[ "${machine}" = "Cygwin" ]]; then
    # The setup_spawnhost_coredump script runs as the mci-exec user on Windows hosts. However,
    # Server engineers log in as the Administrator user.
    ssh_user="Administrator"
    # The Evergreen binary only expects a Windows path. The rest of Cygwin is flexible about it
    # being a Cygwin path or a Windows path so we do the conversion here.
    evg_credentials_pathname=$(cygpath -w ~Administrator/.evergreen.yml)
    evg_binary_pathname=~Administrator/cli_bin/evergreen
else
    ssh_user=$(whoami)
    evg_credentials_pathname=~/.evergreen.yml
    evg_binary_pathname=evergreen
fi

slack_user=$(awk '{if ($1 == "user:") print $2}' "$evg_credentials_pathname")
# Refer to the https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
# documentation for more information on the AWS instance metadata endpoints.
aws_metadata_svc="http://169.254.169.254"
aws_token=$(curl -s -X PUT "$aws_metadata_svc/latest/api/token" -H 'X-aws-ec2-metadata-token-ttl-seconds: 60')
ssh_host=$(curl -s -H "X-aws-ec2-metadata-token: $aws_token" "$aws_metadata_svc/latest/meta-data/public-hostname")
if [[ "${machine}" = "Cygwin" ]]; then
    slack_message=$(printf "The setup_spawnhost_coredump script has finished setting things up. \
Please use Windows Remote Desktop with\n\
1. PC name: $ssh_host\n\
2. User account: $ssh_user\n\
3. The RDP password configured under the edit dialog at https://spruce.mongodb.com/spawn/host\n\
to log in.")
else
    slack_message="The setup_spawnhost_coredump script has finished setting things up. Please run "'```'"ssh $ssh_user@$ssh_host"'```'" to log in."
fi

# The Evergreen spawn host is expected to be provisioned with the user's .evergreen.yml credentials.
# But in case something unexpected happens we don't want the setup_spawnhost_coredump script itself
# to error.
if [[ -n "${slack_user}" ]]; then
    "$evg_binary_pathname" --config "$evg_credentials_pathname" notify slack -t "@$slack_user" -m "$slack_message"
fi