summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorJulius Goryavsky <julius.goryavsky@mariadb.com>2021-06-15 06:24:38 +0200
committerJulius Goryavsky <julius.goryavsky@mariadb.com>2021-06-15 14:27:22 +0200
commit2edb8e12e10179b970007b3e1d5c465b9d0e110e (patch)
tree6e183202d77d8b2c370b06b89f9395578a457513 /scripts
parent18d5be5b54b1a05e6107a1c5828d9eed9cf18636 (diff)
downloadmariadb-git-2edb8e12e10179b970007b3e1d5c465b9d0e110e.tar.gz
MDEV-25880 part 2: Improving reliability of the SST scripts
Additional improvements aimed at improving operational reliability of the SST scripts: 1) Script need to give rsync and stunnel a short time to terminate after "kill -9" before the first PID check using ps utility; 2) The temporary file used to create the binlog index could sometimes remain in the data directory if tar failed and then may be reused without being cleaned up (the next time when SST was run) - now it's fixed; 3) The temporary file used to build the binlog index is now created using mktemp and, if this variable is present in the configuration file, in tmpdir; 4) Checking the secret tag in SST via rsync is made faster and does not require creating a temporary file, which could remain in the data directory in case of failure; 5) Added "-F" option to grep to check the tag when using mariabackup/xtrabackup-v2 - to avoid possible collisions in case of special characters in the tag value (unlikely scenario, but the new check is more reliable).
Diffstat (limited to 'scripts')
-rw-r--r--scripts/wsrep_sst_common.sh7
-rw-r--r--scripts/wsrep_sst_mariabackup.sh6
-rw-r--r--scripts/wsrep_sst_rsync.sh64
-rw-r--r--scripts/wsrep_sst_xtrabackup-v2.sh6
4 files changed, 49 insertions, 34 deletions
diff --git a/scripts/wsrep_sst_common.sh b/scripts/wsrep_sst_common.sh
index c2f31b2818d..4dedecb439f 100644
--- a/scripts/wsrep_sst_common.sh
+++ b/scripts/wsrep_sst_common.sh
@@ -1223,7 +1223,7 @@ check_pid()
#
cleanup_pid()
{
- local pid="$1"
+ local pid=$1
local pid_file="${2:-}"
local config="${3:-}"
@@ -1241,8 +1241,9 @@ cleanup_pid()
round=8
force=1
kill -9 $pid >/dev/null 2>&1
+ sleep 0.5
else
- return 1;
+ return 1
fi
fi
done
@@ -1254,7 +1255,7 @@ cleanup_pid()
fi
[ -n "$pid_file" ] && [ -f "$pid_file" ] && rm -f "$pid_file"
- [ -n "$config" ] && [ -f "$config" ] && rm -f "$config"
+ [ -n "$config" ] && [ -f "$config" ] && rm -f "$config"
return 0
}
diff --git a/scripts/wsrep_sst_mariabackup.sh b/scripts/wsrep_sst_mariabackup.sh
index 7f97d9e8dea..339a8fcf4a5 100644
--- a/scripts/wsrep_sst_mariabackup.sh
+++ b/scripts/wsrep_sst_mariabackup.sh
@@ -741,15 +741,15 @@ recv_joiner()
fi
# check donor supplied secret
- SECRET=$(grep -- "$SECRET_TAG " "$MAGIC_FILE" 2>/dev/null | cut -d ' ' -f 2)
+ SECRET=$(grep -F -- "$SECRET_TAG " "$MAGIC_FILE" 2>/dev/null | cut -d ' ' -f 2)
if [ "$SECRET" != "$MY_SECRET" ]; then
wsrep_log_error "Donor does not know my secret!"
wsrep_log_info "Donor:'$SECRET', my:'$MY_SECRET'"
exit 32
fi
- # remove secret from magic file
- grep -v -- "$SECRET_TAG " "$MAGIC_FILE" > "$MAGIC_FILE.new"
+ # remove secret from the magic file
+ grep -v -F -- "$SECRET_TAG " "$MAGIC_FILE" > "$MAGIC_FILE.new"
mv "$MAGIC_FILE.new" "$MAGIC_FILE"
fi
}
diff --git a/scripts/wsrep_sst_rsync.sh b/scripts/wsrep_sst_rsync.sh
index a602af79af0..fc9f5017937 100644
--- a/scripts/wsrep_sst_rsync.sh
+++ b/scripts/wsrep_sst_rsync.sh
@@ -429,7 +429,7 @@ EOF
exit 255 # unknown error
fi
- # second, we transfer InnoDB log files
+ # second, we transfer InnoDB and Aria log files
rsync ${STUNNEL:+--rsh="$STUNNEL"} \
--owner --group --perms --links --specials \
--ignore-times --inplace --dirs --delete --quiet \
@@ -504,20 +504,20 @@ then
SST_PID="$WSREP_SST_OPT_DATA/wsrep_rsync_sst.pid"
- # give some time for lingering stunnel from previous SST to complete
+ # give some time for previous SST to complete:
check_round=0
while check_pid "$SST_PID" 0
do
- wsrep_log_info "previous SST not completed, waiting for it to exit"
+ wsrep_log_info "previous SST is not completed, waiting for it to exit"
check_round=$(( check_round + 1 ))
if [ $check_round -eq 10 ]; then
- wsrep_log_error "SST script already running."
+ wsrep_log_error "previous SST script still running."
exit 114 # EALREADY
fi
sleep 1
done
- # give some time for lingering stunnel from previous SST to complete
+ # give some time for stunnel from the previous SST to complete:
check_round=0
while check_pid "$STUNNEL_PID" 1
do
@@ -534,7 +534,7 @@ then
RSYNC_PID="$WSREP_SST_OPT_DATA/$MODULE.pid"
RSYNC_CONF="$WSREP_SST_OPT_DATA/$MODULE.conf"
- # give some time for lingering rsync from previous SST to complete
+ # give some time for rsync from the previous SST to complete:
check_round=0
while check_pid "$RSYNC_PID" 1
do
@@ -711,35 +711,49 @@ EOF
# Clean up old binlog files first
rm -f "$BINLOG_FILENAME".[0-9]*
- [ -f "$binlog_index" ] && rm "$binlog_index"
+ [ -f "$binlog_index" ] && rm -f "$binlog_index"
+
+ # Create a temporary file:
+ tmpdir=$(parse_cnf '--mysqld|sst' 'tmpdir')
+ if [ -z "$tmpdir" ]; then
+ tmpfile="$(mktemp)"
+ else
+ tmpfile=$(mktemp "--tmpdir=$tmpdir")
+ fi
wsrep_log_info "Extracting binlog files:"
- tar -xvf "$BINLOG_TAR_FILE" >> _binlog_tmp_files_$!
+ if ! tar -xvf "$BINLOG_TAR_FILE" > "$tmpfile"; then
+ wsrep_log_error "Error unpacking tar file with binlog files"
+ rm -f "$tmpfile"
+ exit 32
+ fi
+
+ # Rebuild binlog index:
while read bin_file; do
echo "$BINLOG_DIRNAME/$bin_file" >> "$binlog_index"
- done < _binlog_tmp_files_$!
- rm -f _binlog_tmp_files_$!
+ done < "$tmpfile"
+ rm -f "$tmpfile"
cd "$OLD_PWD"
fi
fi
- if [ -r "$MAGIC_FILE" ]
- then
- # check donor supplied secret
- SECRET=$(grep -F -- "$SECRET_TAG " "$MAGIC_FILE" 2>/dev/null | cut -d ' ' -f 2)
- if [ "$SECRET" != "$MY_SECRET" ]; then
- wsrep_log_error "Donor does not know my secret!"
- wsrep_log_info "Donor:'$SECRET', my:'$MY_SECRET'"
- exit 32
+ if [ -r "$MAGIC_FILE" ]; then
+ if [ -n "$MY_SECRET" ]; then
+ # check donor supplied secret
+ SECRET=$(grep -F -- "$SECRET_TAG " "$MAGIC_FILE" 2>/dev/null | cut -d ' ' -f 2)
+ if [ "$SECRET" != "$MY_SECRET" ]; then
+ wsrep_log_error "Donor does not know my secret!"
+ wsrep_log_info "Donor:'$SECRET', my:'$MY_SECRET'"
+ exit 32
+ fi
+ # remove secret from the magic file, and output
+ # the UUID:seqno & wsrep_gtid_domain_id:
+ grep -v -F -- "$SECRET_TAG " "$MAGIC_FILE"
+ else
+ # Output the UUID:seqno and wsrep_gtid_domain_id:
+ cat "$MAGIC_FILE"
fi
-
- # remove secret from magic file
- grep -v -F -- "$SECRET_TAG " "$MAGIC_FILE" > "$MAGIC_FILE.new"
-
- mv "$MAGIC_FILE.new" "$MAGIC_FILE"
- # UUID:seqno & wsrep_gtid_domain_id is received here.
- cat "$MAGIC_FILE" # Output : UUID:seqno wsrep_gtid_domain_id
else
# this message should cause joiner to abort
echo "rsync process ended without creating '$MAGIC_FILE'"
diff --git a/scripts/wsrep_sst_xtrabackup-v2.sh b/scripts/wsrep_sst_xtrabackup-v2.sh
index 24bff12219d..d76dc346a82 100644
--- a/scripts/wsrep_sst_xtrabackup-v2.sh
+++ b/scripts/wsrep_sst_xtrabackup-v2.sh
@@ -750,15 +750,15 @@ recv_joiner()
fi
# check donor supplied secret
- SECRET=$(grep -- "$SECRET_TAG " "$MAGIC_FILE" 2>/dev/null | cut -d ' ' -f 2)
+ SECRET=$(grep -F -- "$SECRET_TAG " "$MAGIC_FILE" 2>/dev/null | cut -d ' ' -f 2)
if [ "$SECRET" != "$MY_SECRET" ]; then
wsrep_log_error "Donor does not know my secret!"
wsrep_log_info "Donor:'$SECRET', my:'$MY_SECRET'"
exit 32
fi
- # remove secret from magic file
- grep -v -- "$SECRET_TAG " "$MAGIC_FILE" > "$MAGIC_FILE.new"
+ # remove secret from the magic file
+ grep -v -F -- "$SECRET_TAG " "$MAGIC_FILE" > "$MAGIC_FILE.new"
mv "$MAGIC_FILE.new" "$MAGIC_FILE"
fi
}