diff options
author | Julius Goryavsky <julius.goryavsky@mariadb.com> | 2021-06-15 06:24:38 +0200 |
---|---|---|
committer | Julius Goryavsky <julius.goryavsky@mariadb.com> | 2021-06-15 14:27:22 +0200 |
commit | 2edb8e12e10179b970007b3e1d5c465b9d0e110e (patch) | |
tree | 6e183202d77d8b2c370b06b89f9395578a457513 /scripts | |
parent | 18d5be5b54b1a05e6107a1c5828d9eed9cf18636 (diff) | |
download | mariadb-git-2edb8e12e10179b970007b3e1d5c465b9d0e110e.tar.gz |
MDEV-25880 part 2: Improving reliability of the SST scripts
Additional improvements aimed at improving operational
reliability of the SST scripts:
1) Script need to give rsync and stunnel a short time to
terminate after "kill -9" before the first PID check
using ps utility;
2) The temporary file used to create the binlog index could
sometimes remain in the data directory if tar failed and
then may be reused without being cleaned up (the next
time when SST was run) - now it's fixed;
3) The temporary file used to build the binlog index is now
created using mktemp and, if this variable is present in
the configuration file, in tmpdir;
4) Checking the secret tag in SST via rsync is made faster
and does not require creating a temporary file, which
could remain in the data directory in case of failure;
5) Added "-F" option to grep to check the tag when using
mariabackup/xtrabackup-v2 - to avoid possible collisions
in case of special characters in the tag value (unlikely
scenario, but the new check is more reliable).
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/wsrep_sst_common.sh | 7 | ||||
-rw-r--r-- | scripts/wsrep_sst_mariabackup.sh | 6 | ||||
-rw-r--r-- | scripts/wsrep_sst_rsync.sh | 64 | ||||
-rw-r--r-- | scripts/wsrep_sst_xtrabackup-v2.sh | 6 |
4 files changed, 49 insertions, 34 deletions
diff --git a/scripts/wsrep_sst_common.sh b/scripts/wsrep_sst_common.sh index c2f31b2818d..4dedecb439f 100644 --- a/scripts/wsrep_sst_common.sh +++ b/scripts/wsrep_sst_common.sh @@ -1223,7 +1223,7 @@ check_pid() # cleanup_pid() { - local pid="$1" + local pid=$1 local pid_file="${2:-}" local config="${3:-}" @@ -1241,8 +1241,9 @@ cleanup_pid() round=8 force=1 kill -9 $pid >/dev/null 2>&1 + sleep 0.5 else - return 1; + return 1 fi fi done @@ -1254,7 +1255,7 @@ cleanup_pid() fi [ -n "$pid_file" ] && [ -f "$pid_file" ] && rm -f "$pid_file" - [ -n "$config" ] && [ -f "$config" ] && rm -f "$config" + [ -n "$config" ] && [ -f "$config" ] && rm -f "$config" return 0 } diff --git a/scripts/wsrep_sst_mariabackup.sh b/scripts/wsrep_sst_mariabackup.sh index 7f97d9e8dea..339a8fcf4a5 100644 --- a/scripts/wsrep_sst_mariabackup.sh +++ b/scripts/wsrep_sst_mariabackup.sh @@ -741,15 +741,15 @@ recv_joiner() fi # check donor supplied secret - SECRET=$(grep -- "$SECRET_TAG " "$MAGIC_FILE" 2>/dev/null | cut -d ' ' -f 2) + SECRET=$(grep -F -- "$SECRET_TAG " "$MAGIC_FILE" 2>/dev/null | cut -d ' ' -f 2) if [ "$SECRET" != "$MY_SECRET" ]; then wsrep_log_error "Donor does not know my secret!" wsrep_log_info "Donor:'$SECRET', my:'$MY_SECRET'" exit 32 fi - # remove secret from magic file - grep -v -- "$SECRET_TAG " "$MAGIC_FILE" > "$MAGIC_FILE.new" + # remove secret from the magic file + grep -v -F -- "$SECRET_TAG " "$MAGIC_FILE" > "$MAGIC_FILE.new" mv "$MAGIC_FILE.new" "$MAGIC_FILE" fi } diff --git a/scripts/wsrep_sst_rsync.sh b/scripts/wsrep_sst_rsync.sh index a602af79af0..fc9f5017937 100644 --- a/scripts/wsrep_sst_rsync.sh +++ b/scripts/wsrep_sst_rsync.sh @@ -429,7 +429,7 @@ EOF exit 255 # unknown error fi - # second, we transfer InnoDB log files + # second, we transfer InnoDB and Aria log files rsync ${STUNNEL:+--rsh="$STUNNEL"} \ --owner --group --perms --links --specials \ --ignore-times --inplace --dirs --delete --quiet \ @@ -504,20 +504,20 @@ then SST_PID="$WSREP_SST_OPT_DATA/wsrep_rsync_sst.pid" - # give some time for lingering stunnel from previous SST to complete + # give some time for previous SST to complete: check_round=0 while check_pid "$SST_PID" 0 do - wsrep_log_info "previous SST not completed, waiting for it to exit" + wsrep_log_info "previous SST is not completed, waiting for it to exit" check_round=$(( check_round + 1 )) if [ $check_round -eq 10 ]; then - wsrep_log_error "SST script already running." + wsrep_log_error "previous SST script still running." exit 114 # EALREADY fi sleep 1 done - # give some time for lingering stunnel from previous SST to complete + # give some time for stunnel from the previous SST to complete: check_round=0 while check_pid "$STUNNEL_PID" 1 do @@ -534,7 +534,7 @@ then RSYNC_PID="$WSREP_SST_OPT_DATA/$MODULE.pid" RSYNC_CONF="$WSREP_SST_OPT_DATA/$MODULE.conf" - # give some time for lingering rsync from previous SST to complete + # give some time for rsync from the previous SST to complete: check_round=0 while check_pid "$RSYNC_PID" 1 do @@ -711,35 +711,49 @@ EOF # Clean up old binlog files first rm -f "$BINLOG_FILENAME".[0-9]* - [ -f "$binlog_index" ] && rm "$binlog_index" + [ -f "$binlog_index" ] && rm -f "$binlog_index" + + # Create a temporary file: + tmpdir=$(parse_cnf '--mysqld|sst' 'tmpdir') + if [ -z "$tmpdir" ]; then + tmpfile="$(mktemp)" + else + tmpfile=$(mktemp "--tmpdir=$tmpdir") + fi wsrep_log_info "Extracting binlog files:" - tar -xvf "$BINLOG_TAR_FILE" >> _binlog_tmp_files_$! + if ! tar -xvf "$BINLOG_TAR_FILE" > "$tmpfile"; then + wsrep_log_error "Error unpacking tar file with binlog files" + rm -f "$tmpfile" + exit 32 + fi + + # Rebuild binlog index: while read bin_file; do echo "$BINLOG_DIRNAME/$bin_file" >> "$binlog_index" - done < _binlog_tmp_files_$! - rm -f _binlog_tmp_files_$! + done < "$tmpfile" + rm -f "$tmpfile" cd "$OLD_PWD" fi fi - if [ -r "$MAGIC_FILE" ] - then - # check donor supplied secret - SECRET=$(grep -F -- "$SECRET_TAG " "$MAGIC_FILE" 2>/dev/null | cut -d ' ' -f 2) - if [ "$SECRET" != "$MY_SECRET" ]; then - wsrep_log_error "Donor does not know my secret!" - wsrep_log_info "Donor:'$SECRET', my:'$MY_SECRET'" - exit 32 + if [ -r "$MAGIC_FILE" ]; then + if [ -n "$MY_SECRET" ]; then + # check donor supplied secret + SECRET=$(grep -F -- "$SECRET_TAG " "$MAGIC_FILE" 2>/dev/null | cut -d ' ' -f 2) + if [ "$SECRET" != "$MY_SECRET" ]; then + wsrep_log_error "Donor does not know my secret!" + wsrep_log_info "Donor:'$SECRET', my:'$MY_SECRET'" + exit 32 + fi + # remove secret from the magic file, and output + # the UUID:seqno & wsrep_gtid_domain_id: + grep -v -F -- "$SECRET_TAG " "$MAGIC_FILE" + else + # Output the UUID:seqno and wsrep_gtid_domain_id: + cat "$MAGIC_FILE" fi - - # remove secret from magic file - grep -v -F -- "$SECRET_TAG " "$MAGIC_FILE" > "$MAGIC_FILE.new" - - mv "$MAGIC_FILE.new" "$MAGIC_FILE" - # UUID:seqno & wsrep_gtid_domain_id is received here. - cat "$MAGIC_FILE" # Output : UUID:seqno wsrep_gtid_domain_id else # this message should cause joiner to abort echo "rsync process ended without creating '$MAGIC_FILE'" diff --git a/scripts/wsrep_sst_xtrabackup-v2.sh b/scripts/wsrep_sst_xtrabackup-v2.sh index 24bff12219d..d76dc346a82 100644 --- a/scripts/wsrep_sst_xtrabackup-v2.sh +++ b/scripts/wsrep_sst_xtrabackup-v2.sh @@ -750,15 +750,15 @@ recv_joiner() fi # check donor supplied secret - SECRET=$(grep -- "$SECRET_TAG " "$MAGIC_FILE" 2>/dev/null | cut -d ' ' -f 2) + SECRET=$(grep -F -- "$SECRET_TAG " "$MAGIC_FILE" 2>/dev/null | cut -d ' ' -f 2) if [ "$SECRET" != "$MY_SECRET" ]; then wsrep_log_error "Donor does not know my secret!" wsrep_log_info "Donor:'$SECRET', my:'$MY_SECRET'" exit 32 fi - # remove secret from magic file - grep -v -- "$SECRET_TAG " "$MAGIC_FILE" > "$MAGIC_FILE.new" + # remove secret from the magic file + grep -v -F -- "$SECRET_TAG " "$MAGIC_FILE" > "$MAGIC_FILE.new" mv "$MAGIC_FILE.new" "$MAGIC_FILE" fi } |