diff options
author | Julius Goryavsky <julius.goryavsky@mariadb.com> | 2022-06-08 15:36:28 +0200 |
---|---|---|
committer | Julius Goryavsky <julius.goryavsky@mariadb.com> | 2022-06-14 12:29:14 +0200 |
commit | 124326d8108623e36f8e0a4a7c15774beed18015 (patch) | |
tree | 944edd5ebcd5079e7700982a331c839f54b9eaca /scripts | |
parent | c168e16782fc449f61412e5afc1c01d59b77c675 (diff) | |
download | mariadb-git-124326d8108623e36f8e0a4a7c15774beed18015.tar.gz |
MDEV-28656: Inability to roll upgrade without stopping the Galera cluster
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/wsrep_sst_backup.sh | 3 | ||||
-rw-r--r-- | scripts/wsrep_sst_common.sh | 13 | ||||
-rw-r--r-- | scripts/wsrep_sst_mariabackup.sh | 221 | ||||
-rw-r--r-- | scripts/wsrep_sst_rsync.sh | 56 |
4 files changed, 188 insertions, 105 deletions
diff --git a/scripts/wsrep_sst_backup.sh b/scripts/wsrep_sst_backup.sh index 301739905b6..4f98ad41dd5 100644 --- a/scripts/wsrep_sst_backup.sh +++ b/scripts/wsrep_sst_backup.sh @@ -72,7 +72,7 @@ then # (c) ERROR file, in case flush tables operation failed. while [ ! -r "$FLUSHED" ] && \ - ! grep -q -F ':' -- "$FLUSHED" >/dev/null 2>&1 + ! grep -q -F ':' -- "$FLUSHED" do # Check whether ERROR file exists. if [ -f "$ERROR" ]; then @@ -105,4 +105,5 @@ else # joiner fi +wsrep_log_info "$WSREP_METHOD $WSREP_TRANSFER_TYPE completed on $WSREP_SST_OPT_ROLE" exit 0 diff --git a/scripts/wsrep_sst_common.sh b/scripts/wsrep_sst_common.sh index 3d0a132f3fc..35021ace8b6 100644 --- a/scripts/wsrep_sst_common.sh +++ b/scripts/wsrep_sst_common.sh @@ -80,6 +80,7 @@ to_minuses() } WSREP_SST_OPT_BYPASS=0 +WSREP_SST_OPT_PROGRESS=0 WSREP_SST_OPT_BINLOG="" WSREP_SST_OPT_BINLOG_INDEX="" WSREP_SST_OPT_LOG_BASENAME="" @@ -187,6 +188,10 @@ case "$1" in '--bypass') readonly WSREP_SST_OPT_BYPASS=1 ;; + '--progress') + readonly WSREP_SST_OPT_PROGRESS=$(( $2 )) + shift + ;; '--datadir') # Let's remove the trailing slash: readonly WSREP_SST_OPT_DATA=$(trim_dir "$2") @@ -246,11 +251,11 @@ case "$1" in shift ;; '--local-port') - readonly WSREP_SST_OPT_LPORT="$2" + readonly WSREP_SST_OPT_LPORT=$(( $2 )) shift ;; '--parent') - readonly WSREP_SST_OPT_PARENT="$2" + readonly WSREP_SST_OPT_PARENT=$(( $2 )) shift ;; '--password') @@ -258,7 +263,7 @@ case "$1" in shift ;; '--port') - readonly WSREP_SST_OPT_PORT="$2" + readonly WSREP_SST_OPT_PORT=$(( $2 )) shift ;; '--role') @@ -531,6 +536,8 @@ else readonly WSREP_SST_OPT_ROLE='donor' fi +readonly WSREP_SST_OPT_PROGRESS + # The same argument can be present on the command line several # times, in this case we must take its last value: if [ -n "${MYSQLD_OPT_INNODB_DATA_HOME_DIR:-}" -a \ diff --git a/scripts/wsrep_sst_mariabackup.sh b/scripts/wsrep_sst_mariabackup.sh index a66a792b5ea..8e025f94a3e 100644 --- a/scripts/wsrep_sst_mariabackup.sh +++ b/scripts/wsrep_sst_mariabackup.sh @@ -86,15 +86,12 @@ encrypt_threads="" encrypt_chunk="" readonly SECRET_TAG='secret' +readonly TOTAL_TAG='total' # Required for backup locks # For backup locks it is 1 sent by joiner sst_ver=1 -if [ -n "$(commandex pv)" ] && pv --help | grep -qw -F -- '-F'; then - pvopts="$pvopts $pvformat" -fi -pcmd="pv $pvopts" declare -a RC BACKUP_BIN=$(commandex 'mariabackup') @@ -121,18 +118,19 @@ timeit() if [ $ttime -eq 1 ]; then x1=$(date +%s) - wsrep_log_info "Evaluating $cmd" - eval "$cmd" - extcode=$? + fi + + wsrep_log_info "Evaluating $cmd" + eval $cmd + extcode=$? + + if [ $ttime -eq 1 ]; then x2=$(date +%s) took=$(( x2-x1 )) wsrep_log_info "NOTE: $stage took $took seconds" totime=$(( totime+took )) - else - wsrep_log_info "Evaluating $cmd" - eval "$cmd" - extcode=$? fi + return $extcode } @@ -419,44 +417,90 @@ get_transfer() get_footprint() { cd "$DATA_DIR" - payload=$(find . -regex '.*\.ibd$\|.*\.MYI$\|.*\.MYD$\|.*ibdata1$' \ - -type f -print0 | du --files0-from=- --block-size=1 -c -s | \ - awk 'END { print $1 }') + local payload_data=$(find . \ + -regex '.*undo[0-9]+$\|.*\.ibd$\|.*\.MYI$\|.*\.MYD$\|.*ibdata1$' \ + -type f -print0 | du --files0-from=- --block-size=1 -c -s | \ + awk 'END { print $1 }') + + local payload_undo=0 + if [ -n "$ib_undo_dir" -a -d "$ib_undo_dir" ]; then + cd "$ib_undo_dir" + payload_undo=$(find . -regex '.*undo[0-9]+$' -type f -print0 | \ + du --files0-from=- --block-size=1 -c -s | awk 'END { print $1 }') + fi + cd "$OLD_PWD" + + wsrep_log_info \ + "SST footprint estimate: data: $payload_data, undo: $payload_undo" + + payload=$(( payload_data + payload_undo )) + if [ "$compress" != 'none' ]; then # QuickLZ has around 50% compression ratio # When compression/compaction used, the progress is only an approximate. payload=$(( payload*1/2 )) fi - cd "$OLD_PWD" - pcmd="$pcmd -s $payload" + + if [ $WSREP_SST_OPT_PROGRESS -eq 1 ]; then + # report to parent the total footprint of the SST + echo "$TOTAL_TAG $payload" + fi + adjust_progress } adjust_progress() { - if [ -z "$(commandex pv)" ]; then - wsrep_log_error "pv not found in path: $PATH" - wsrep_log_error "Disabling all progress/rate-limiting" - pcmd="" - rlimit="" - progress="" - return - fi + pcmd="" + rcmd="" - if [ -n "$progress" -a "$progress" != '1' ]; then - if [ -e "$progress" ]; then - pcmd="$pcmd 2>>'$progress'" - else - pcmd="$pcmd 2>'$progress'" - fi - elif [ -z "$progress" -a -n "$rlimit" ]; then - # When rlimit is non-zero - pcmd='pv -q' - fi + [ "$progress" = 'none' ] && return + rlimitopts="" if [ -n "$rlimit" -a "$WSREP_SST_OPT_ROLE" = 'donor' ]; then wsrep_log_info "Rate-limiting SST to $rlimit" - pcmd="$pcmd -L \$rlimit" + rlimitopts=" -L $rlimit" + fi + + if [ -n "$progress" ]; then + + # Backward compatibility: user-configured progress output + pcmd="pv $pvopts$rlimitopts" + + if [ -z "${PV_FORMAT+x}" ]; then + PV_FORMAT=0 + pv --help | grep -qw -F -- '-F' && PV_FORMAT=1 + fi + if [ $PV_FORMAT -eq 1 ]; then + pcmd="$pcmd $pvformat" + fi + + if [ $payload -ne 0 ]; then + pcmd="$pcmd -s $payload" + fi + + if [ "$progress" != '1' ]; then + if [ -e "$progress" ]; then + pcmd="$pcmd 2>>'$progress'" + else + pcmd="$pcmd 2>'$progress'" + fi + fi + + elif [ $WSREP_SST_OPT_PROGRESS -eq 1 ]; then + + # Default progress output parseable by parent + pcmd="pv -f -i 1 -n -b$rlimitopts" + + # read progress data, add tag and post to stdout + # for the parent + rcmd="stdbuf -oL tr '\r' '\n' | xargs -n1 echo complete" + + elif [ -n "$rlimitopts" ]; then + + # Rate-limiting only, when rlimit is non-zero + pcmd="pv -q$rlimitopts" + fi } @@ -765,18 +809,28 @@ recv_joiner() wsrep_log_info $(ls -l "$dir/"*) exit 32 fi - # Select the "secret" tag whose value does not start - # with a slash symbol. All new tags must to start with - # the space and the slash symbol after the word "secret" - - # to be removed by older versions of the SST scripts: - SECRET=$(grep -m1 -E "^$SECRET_TAG[[:space:]]+[^/]" \ - -- "$MAGIC_FILE" || :) - # Check donor supplied secret: - SECRET=$(trim_string "${SECRET#$SECRET_TAG}") - if [ "$SECRET" != "$MY_SECRET" ]; then - wsrep_log_error "Donor does not know my secret!" - wsrep_log_info "Donor: '$SECRET', my: '$MY_SECRET'" - exit 32 + + if [ -n "$MY_SECRET" ]; then + # Check donor supplied secret: + SECRET=$(grep -m1 -E "^$SECRET_TAG[[:space:]]" "$MAGIC_FILE" || :) + SECRET=$(trim_string "${SECRET#$SECRET_TAG}") + if [ "$SECRET" != "$MY_SECRET" ]; then + wsrep_log_error "Donor does not know my secret!" + wsrep_log_info "Donor: '$SECRET', my: '$MY_SECRET'" + exit 32 + fi + fi + + if [ $WSREP_SST_OPT_PROGRESS -eq 1 ]; then + # check total SST footprint + payload=$(grep -m1 -E "^$TOTAL_TAG[[:space:]]" "$MAGIC_FILE" || :) + if [ -n "$payload" ]; then + payload=$(trim_string "${payload#$TOTAL_TAG}") + if [ $payload -ge 0 ]; then + # report to parent + echo "$TOTAL_TAG $payload" + fi + fi fi fi } @@ -825,6 +879,14 @@ monitor_process() read_cnf setup_ports +if [ "$progress" = 'none' ]; then + wsrep_log_info "All progress/rate-limiting disabled by configuration" +elif [ -z "$(commandex pv)" ]; then + wsrep_log_info "Progress reporting tool pv not found in path: $PATH" + wsrep_log_info "Disabling all progress/rate-limiting" + progress='none' +fi + if "$BACKUP_BIN" --help 2>/dev/null | grep -qw -F -- '--version-check'; then disver=' --no-version-check' fi @@ -980,6 +1042,14 @@ if [ "$WSREP_SST_OPT_ROLE" = 'donor' ]; then check_extra + if [ -n "$progress" -o $WSREP_SST_OPT_PROGRESS -eq 1 ]; then + wsrep_log_info "Estimating total transfer size" + get_footprint + wsrep_log_info "To transfer: $payload" + else + adjust_progress + fi + wsrep_log_info "Streaming GTID file before SST" # Store donor's wsrep GTID (state ID) and wsrep_gtid_domain_id @@ -991,6 +1061,11 @@ if [ "$WSREP_SST_OPT_ROLE" = 'donor' ]; then echo "$SECRET_TAG $WSREP_SST_OPT_REMOTE_PSWD" >> "$MAGIC_FILE" fi + if [ $WSREP_SST_OPT_PROGRESS -eq 1 ]; then + # Tell joiner what to expect: + echo "$TOTAL_TAG $payload" >> "$MAGIC_FILE" + fi + ttcmd="$tcmd" if [ -n "$scomp" ]; then @@ -1007,12 +1082,14 @@ if [ "$WSREP_SST_OPT_ROLE" = 'donor' ]; then # Restore the transport commmand to its original state tcmd="$ttcmd" - if [ -n "$progress" ]; then - get_footprint - tcmd="$pcmd | $tcmd" - elif [ -n "$rlimit" ]; then - adjust_progress - tcmd="$pcmd | $tcmd" + if [ -n "$pcmd" ]; then + if [ -n "$rcmd" ]; then + # redirect pv stderr to rcmd for tagging and output to parent + tcmd="{ $pcmd 2>&3 | $tcmd; } 3>&1 | $rcmd" + else + # use user-configured pv output + tcmd="$pcmd | $tcmd" + fi fi wsrep_log_info "Sleeping before data transfer for SST" @@ -1214,13 +1291,6 @@ else # joiner MY_SECRET="" # for check down in recv_joiner() fi - trap cleanup_at_exit EXIT - - if [ -n "$progress" ]; then - adjust_progress - tcmd="$tcmd | $pcmd" - fi - get_keys if [ $encrypt -eq 1 ]; then strmcmd="$ecmd | $strmcmd" @@ -1232,6 +1302,8 @@ else # joiner check_sockets_utils + trap cleanup_at_exit EXIT + STATDIR="$(mktemp -d)" MAGIC_FILE="$STATDIR/$INFO_FILE" @@ -1245,6 +1317,17 @@ else # joiner if [ ! -r "$STATDIR/$IST_FILE" ]; then + adjust_progress + if [ -n "$pcmd" ]; then + if [ -n "$rcmd" ]; then + # redirect pv stderr to rcmd for tagging and output to parent + strmcmd="{ $pcmd 2>&3 | $strmcmd; } 3>&1 | $rcmd" + else + # use user-configured pv output + strmcmd="$pcmd | $strmcmd" + fi + fi + if [ -d "$DATA/.sst" ]; then wsrep_log_info \ "WARNING: Stale temporary SST directory:" \ @@ -1265,13 +1348,13 @@ else # joiner cd "$DATA" wsrep_log_info "Cleaning the old binary logs" # If there is a file with binlogs state, delete it: - [ -f "$binlog_base.state" ] && rm -f "$binlog_base.state" >&2 + [ -f "$binlog_base.state" ] && rm "$binlog_base.state" >&2 # Clean up the old binlog files and index: if [ -f "$binlog_index" ]; then while read bin_file || [ -n "$bin_file" ]; do rm -f "$bin_file" >&2 || : done < "$binlog_index" - rm -f "$binlog_index" >&2 + rm "$binlog_index" >&2 fi if [ -n "$binlog_dir" -a "$binlog_dir" != '.' -a \ -d "$binlog_dir" ] @@ -1335,16 +1418,14 @@ else # joiner dcmd="xargs -n 2 qpress -dT$nproc" - if [ -n "$progress" ] && \ + if [ -n "$progress" -a "$progress" != 'none' ] && \ pv --help | grep -qw -F -- '--line-mode' then - count=$(find "$DATA" -type f -name '*.qp' | wc -l) + count=$(find "$DATA" -maxdepth 1 -type f -name '*.qp' | wc -l) count=$(( count*2 )) - pvopts="-f -s $count -l -N Decompression" - if pv --help | grep -qw -F -- '-F'; then - pvopts="$pvopts -F '%N => Rate:%r Elapsed:%t %e Progress: [%b/$count]'" - fi - pcmd="pv $pvopts" + pvopts='-f -l -N Decompression' + pvformat="-F '%N => Rate:%r Elapsed:%t %e Progress: [%b/$count]'" + payload=$count adjust_progress dcmd="$pcmd | $dcmd" fi @@ -1442,7 +1523,7 @@ else # joiner fi # Remove special tags from the magic file, and from the output: - coords=$(grep -v -E "^$SECRET_TAG[[:space:]]" -- "$MAGIC_FILE") + coords=$(head -n1 "$MAGIC_FILE") wsrep_log_info "Galera co-ords from recovery: $coords" echo "$coords" # Output : UUID:seqno wsrep_gtid_domain_id diff --git a/scripts/wsrep_sst_rsync.sh b/scripts/wsrep_sst_rsync.sh index 994347d6f73..7096bb4b330 100644 --- a/scripts/wsrep_sst_rsync.sh +++ b/scripts/wsrep_sst_rsync.sh @@ -65,21 +65,21 @@ cleanup_joiner() if [ $failure -eq 0 ]; then if cleanup_pid $RSYNC_REAL_PID "$RSYNC_PID" "$RSYNC_CONF"; then - [ -f "$MAGIC_FILE" ] && rm -f "$MAGIC_FILE" - [ -f "$BINLOG_TAR_FILE" ] && rm -f "$BINLOG_TAR_FILE" + [ -f "$MAGIC_FILE" ] && rm -f "$MAGIC_FILE" || : + [ -f "$BINLOG_TAR_FILE" ] && rm -f "$BINLOG_TAR_FILE" || : else wsrep_log_warning "rsync cleanup failed." fi fi - wsrep_log_info "Joiner cleanup done." - if [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]; then wsrep_cleanup_progress_file fi [ -f "$SST_PID" ] && rm -f "$SST_PID" || : + wsrep_log_info "Joiner cleanup done." + exit $estatus } @@ -318,7 +318,7 @@ if [ -n "$SSLMODE" -a "$SSLMODE" != 'DISABLED' ]; then fi readonly SECRET_TAG='secret' -readonly BYPASS_TAG='secret /bypass' +readonly BYPASS_TAG='bypass' SST_PID="$WSREP_SST_OPT_DATA/wsrep_sst.pid" @@ -371,10 +371,11 @@ done [ -f "$MAGIC_FILE" ] && rm -f "$MAGIC_FILE" [ -f "$BINLOG_TAR_FILE" ] && rm -f "$BINLOG_TAR_FILE" +RC=0 + if [ "$WSREP_SST_OPT_ROLE" = 'donor' ]; then - if [ -n "$STUNNEL" ] - then + if [ -n "$STUNNEL" ]; then cat << EOF > "$STUNNEL_CONF" key = $SSTKEY cert = $SSTCERT @@ -392,8 +393,6 @@ ${CHECK_OPT_LOCAL} EOF fi - RC=0 - if [ $WSREP_SST_OPT_BYPASS -eq 0 ]; then FLUSHED="$WSREP_SST_OPT_DATA/tables_flushed" @@ -410,19 +409,19 @@ EOF # (c) ERROR file, in case flush tables operation failed. while [ ! -r "$FLUSHED" ] && \ - ! grep -q -F ':' -- "$FLUSHED" 2>/dev/null + ! grep -q -F ':' -- "$FLUSHED" do # Check whether ERROR file exists. if [ -f "$ERROR" ]; then # Flush tables operation failed. - rm -f "$ERROR" + rm "$ERROR" exit 255 fi sleep 0.2 done STATE=$(cat "$FLUSHED") - rm -f "$FLUSHED" + rm "$FLUSHED" sync @@ -629,6 +628,8 @@ FILTER="-f '- /lost+found' wsrep_log_info "Transfer of data done" + [ -f "$BINLOG_TAR_FILE" ] && rm "$BINLOG_TAR_FILE" + else # BYPASS wsrep_log_info "Bypassing state dump." @@ -657,6 +658,8 @@ FILTER="-f '- /lost+found' --archive --quiet --checksum "$MAGIC_FILE" \ "rsync://$WSREP_SST_OPT_ADDR" >&2 || RC=$? + rm "$MAGIC_FILE" + if [ $RC -ne 0 ]; then wsrep_log_error "rsync $MAGIC_FILE returned code $RC:" exit 255 # unknown error @@ -665,8 +668,8 @@ FILTER="-f '- /lost+found' echo "done $STATE" if [ -n "$STUNNEL" ]; then - [ -f "$STUNNEL_CONF" ] && rm -f "$STUNNEL_CONF" - [ -f "$STUNNEL_PID" ] && rm -f "$STUNNEL_PID" + rm "$STUNNEL_CONF" + [ -f "$STUNNEL_PID" ] && rm "$STUNNEL_PID" fi else # joiner @@ -704,8 +707,7 @@ $SILENT EOF # If the IP is local, listen only on it: - if is_local_ip "$RSYNC_ADDR_UNESCAPED" - then + if is_local_ip "$RSYNC_ADDR_UNESCAPED"; then RSYNC_EXTRA_ARGS="--address $RSYNC_ADDR_UNESCAPED" STUNNEL_ACCEPT="$RSYNC_ADDR_UNESCAPED:$RSYNC_PORT" else @@ -826,13 +828,8 @@ EOF fi if [ -n "$MY_SECRET" ]; then - # Select the "secret" tag whose value does not start - # with a slash symbol. All new tags must to start with - # the space and the slash symbol after the word "secret" - - # to be removed by older versions of the SST scripts: - SECRET=$(grep -m1 -E "^$SECRET_TAG[[:space:]]+[^/]" \ - -- "$MAGIC_FILE" || :) # Check donor supplied secret: + SECRET=$(grep -m1 -E "^$SECRET_TAG[[:space:]]" "$MAGIC_FILE" || :) SECRET=$(trim_string "${SECRET#$SECRET_TAG}") if [ "$SECRET" != "$MY_SECRET" ]; then wsrep_log_error "Donor does not know my secret!" @@ -842,7 +839,7 @@ EOF fi if [ $WSREP_SST_OPT_BYPASS -eq 0 ]; then - if grep -m1 -qE "^$BYPASS_TAG([[:space:]]+.*)?\$" -- "$MAGIC_FILE"; then + if grep -m1 -qE "^$BYPASS_TAG([[:space:]]+.*)?\$" "$MAGIC_FILE"; then readonly WSREP_SST_OPT_BYPASS=1 readonly WSREP_TRANSFER_TYPE='IST' fi @@ -850,10 +847,10 @@ EOF binlog_tar_present=0 if [ -f "$BINLOG_TAR_FILE" ]; then + binlog_tar_present=1 if [ $WSREP_SST_OPT_BYPASS -ne 0 ]; then wsrep_log_warning "tar with binlogs transferred in the IST mode" fi - binlog_tar_present=1 fi if [ $WSREP_SST_OPT_BYPASS -eq 0 -a -n "$WSREP_SST_OPT_BINLOG" ]; then @@ -867,7 +864,7 @@ EOF while read bin_file || [ -n "$bin_file" ]; do rm -f "$bin_file" || : done < "$binlog_index" - rm -f "$binlog_index" + rm "$binlog_index" fi binlog_cd=0 # Change the directory to binlog base (if possible): @@ -902,7 +899,6 @@ EOF fi # Extracting binlog files: wsrep_log_info "Extracting binlog files:" - RC=0 if tar --version | grep -qw -E '^bsdtar'; then tar -tf "$BINLOG_TAR_FILE" > "$tmpfile" && \ tar -xvf "$BINLOG_TAR_FILE" > /dev/null || RC=$? @@ -912,7 +908,7 @@ EOF fi if [ $RC -ne 0 ]; then wsrep_log_error "Error unpacking tar file with binlog files" - rm -f "$tmpfile" + rm "$tmpfile" exit 32 fi # Rebuild binlog index: @@ -920,18 +916,16 @@ EOF while read bin_file || [ -n "$bin_file" ]; do echo "$binlog_dir${binlog_dir:+/}$bin_file" >> "$binlog_index" done < "$tmpfile" - rm -f "$tmpfile" + rm "$tmpfile" cd "$OLD_PWD" fi fi # Remove special tags from the magic file, and from the output: - coords=$(grep -v -E "^$SECRET_TAG[[:space:]]" -- "$MAGIC_FILE") + coords=$(head -n1 "$MAGIC_FILE") wsrep_log_info "Galera co-ords from recovery: $coords" echo "$coords" # Output : UUID:seqno wsrep_gtid_domain_id fi -[ -f "$BINLOG_TAR_FILE" ] && rm -f "$BINLOG_TAR_FILE" - wsrep_log_info "$WSREP_METHOD $WSREP_TRANSFER_TYPE completed on $WSREP_SST_OPT_ROLE" exit 0 |