diff options
author | Julius Goryavsky <julius.goryavsky@mariadb.com> | 2021-05-10 04:27:16 +0200 |
---|---|---|
committer | Julius Goryavsky <julius.goryavsky@mariadb.com> | 2021-05-11 04:07:04 +0200 |
commit | 3e205cacce5fbc0a5222a7d6c27baddf1eae370d (patch) | |
tree | 40691954b04fc20c68aab6600d5205da3a13806a /scripts/wsrep_sst_rsync.sh | |
parent | f8665314d4ba190679001b81bb7d9fd7a38fc0f6 (diff) | |
download | mariadb-git-10.6-MDEV-23580.tar.gz |
MDEV-23580: WSREP_SST: [ERROR] rsync daemon port has been taken10.6-MDEV-23580
This commit contains a large set of further bug fixes and
improvements to SST scripts for Galera, continuing the work
that was started in MDEV-24962 to make SST scripts work smoothly
in different network configurations (especially using ipv6) and
with different environment settings:
1) The ipv6 addresses were incorrectly handled in the SST script
for rsync (incorrect address substitution for establishing a
connection, incorrect address substitution for bind, and so on);
2) Checking the locality of the ip-address in SST scripts did not
support ipv6 addresses (such as "[::1]"), which were falsely
identified as non-local ip, which further did not allow running
two SSTs on different local addresses on the same machine.
On the other hand, this bug masked some other errors (related
to handling ipv6 addresses);
3) The code for checking the locality of the ip address was different
in the SST scripts for rsync and for mysqldump, with individual
flaws. This code is now made common and moved to wsrep_sst_common;
4) Waiting for the start of the transport channel (socat, nc, rsync,
stunnel) in the wait_for_listen() and check_pid_and_port() functions
did not process ipv6 addresses correctly in all cases (not for all
branches);
5) Waiting for the start of the transport channel (socat, nc, rsync,
stunnel) in the wait_for_listen() and check_pid_and_port() functions
for some code branches could give a false positive result due to
the textual match of prefixes in the port number and/or PID of
the process;
6) Waiting for the start of the transport channel (socat, nc, rsync,
stunnel) was supported through different utilities in SST scripts
for mariabackup and for rsync, and with various minor flaws in
the code. Now the code is still different in these scripts, but
it supports a common set of utilities (lsof, ss, sockstat) and
is synchronized across patterns that used to check the output
of these utilities;
7) In SST via mariabackup, the signal about readiness to receive data
is sometimes sent too early - immediately after listen(), and not
after accept() (which are called by socat or netcat utility).
8) Checking availability of the some options of some utilities was
done using the grep pattern, which easily gives false positives;
9) Common name (CN) for local addresses, if not explicitly specified,
is now always replaced to "localhost" to avoid the need to generate
many separate certificates for local addresses of one machine and
not to depend on which the local address is currently used in test
(ipv4 or ipv6, etc.);
10) In tests galera_sst_mariabackup_encrypt_with_key_server and
galera_sst_rsync_encrypt_with_key_server the correct certificate
is selected to avoid commonname (CN) mismatch problems;
11) Further refactoring to protect against spaces in file names.
12) Further general refactoring to eliminate bash-specific constructs
or to improve code readability;
13) The code for setting options for the nc (netcat) utility was
different in different scripts for SST - now it is made identical.
14) Fixed long-time broken encryption via xbcrypt in combination with
mariabackup and added support for key-based encryption via openssl
utility, which is now enabled by default for encrypt=1 mode (this
default mode can be changed using a new configuration file option
"encypt-format=openssl|xbcrypt", which can be placed in the [mysqld],
[sst] or in the [xtrabackup] section) - this change will allow us
to use and to test the encypt=1 encryption without installing
non-standard third-party utilities.
Diffstat (limited to 'scripts/wsrep_sst_rsync.sh')
-rw-r--r-- | scripts/wsrep_sst_rsync.sh | 178 |
1 files changed, 93 insertions, 85 deletions
diff --git a/scripts/wsrep_sst_rsync.sh b/scripts/wsrep_sst_rsync.sh index 92fdc28f643..70e4a3326a1 100644 --- a/scripts/wsrep_sst_rsync.sh +++ b/scripts/wsrep_sst_rsync.sh @@ -23,13 +23,13 @@ RSYNC_PID= # rsync pid file RSYNC_CONF= # rsync configuration file RSYNC_REAL_PID= # rsync process id -OS=$(uname) +OS="$(uname)" [ "$OS" = 'Darwin' ] && export -n LD_LIBRARY_PATH # Setting the path for lsof on CentOS export PATH="/usr/sbin:/sbin:$PATH" -. $(dirname $0)/wsrep_sst_common +. $(dirname "$0")/wsrep_sst_common wsrep_check_datadir wsrep_check_programs rsync @@ -48,7 +48,7 @@ cleanup_joiner() rm -rf "$MAGIC_FILE" rm -rf "$RSYNC_PID" wsrep_log_info "Joiner cleanup done." - if [ "${WSREP_SST_OPT_ROLE}" = "joiner" ];then + if [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]; then wsrep_cleanup_progress_file fi } @@ -57,68 +57,71 @@ cleanup_joiner() check_pid() { local pid_file="$1" - [ -r "$pid_file" ] && ps -p $(cat "$pid_file") >/dev/null 2>&1 + [ -r "$pid_file" ] && ps -p $(cat "$pid_file") 2>&1 >/dev/null } check_pid_and_port() { local pid_file="$1" local rsync_pid=$2 - local rsync_addr=$3 - local rsync_port=$4 - - case $OS in - FreeBSD) - local port_info="$(sockstat -46lp ${rsync_port} 2>/dev/null | \ - grep ":${rsync_port}")" - local is_rsync="$(echo $port_info | \ - grep -E '[[:space:]]+(rsync|stunnel)[[:space:]]+'"$rsync_pid" 2>/dev/null)" - ;; - *) - if [ ! -x "$(command -v lsof)" ]; then - wsrep_log_error "lsof tool not found in PATH! Make sure you have it installed." - exit 2 # ENOENT - fi - local port_info="$(lsof -i :$rsync_port -Pn 2>/dev/null | \ - grep "(LISTEN)")" - local is_rsync="$(echo $port_info | \ - grep -E '^(rsync|stunnel)[[:space:]]+'"$rsync_pid" 2>/dev/null)" - ;; - esac + local rsync_addr="$3" + local rsync_port="$4" + + if [ -z "$rsync_port" -o -z "$rsync_addr" -o -z "$rsync_pid" ]; then + wsrep_log_error "check_pid_and_port(): bad arguments" + exit 2 # ENOENT + fi - local is_listening_all="$(echo $port_info | \ - grep "*:$rsync_port" 2>/dev/null)" - local is_listening_addr="$(echo $port_info | \ - grep -F "$rsync_addr:$rsync_port" 2>/dev/null)" + local port_info is_rsync + + if [ $lsof_available -ne 0 ]; then + port_info=$(lsof -i ":$rsync_port" -Pn 2>/dev/null | \ + grep -F '(LISTEN)') + is_rsync=$(echo "$port_info" | \ + grep -E "^(rsync|stunnel)[^[:space:]]*[[:space:]]+$rsync_pid[[:space:]]+") + elif [ $sockstat_available -ne 0 ]; then + port_info=$(sockstat -p "$rsync_port" 2>/dev/null | \ + grep -F 'LISTEN') + is_rsync=$(echo "$port_info" | \ + grep -E "[[:space:]]+(rsync|stunnel)[^[:space:]]*[[:space:]]+$rsync_pid[[:space:]]+") + elif [ $ss_available -ne 0 ]; then + port_info=$(ss -H -p -n -l "( sport = :$rsync_port )" 2>/dev/null) + is_rsync=$(echo "$port_info" | \ + grep -E "users:\\(.*\\(\"(rsync|stunnel)[^[:space:]]*\".*\<pid=$rsync_pid\>.*\\)") + else + wsrep_log_error "unknown sockets utility" + exit 2 # ENOENT + fi - if [ ! -z "$is_listening_all" -o ! -z "$is_listening_addr" ]; then - if [ -z "$is_rsync" ]; then - wsrep_log_error "rsync daemon port '$rsync_port' has been taken" + if [ -z "$is_rsync" ]; then + local is_listening_all + if [ $lsof_available -ne 0 ]; then + is_listening_all=$(echo "$port_info" | \ + grep -E "[[:space:]](\\*|\\[?::\\]?):$rsync_port[[:space:]]") + else + if [ $sockstat_available -eq 0 ]; then + port_info=$(echo "$port_info" | grep -q -F 'users:(') + fi + port_info=$(echo "$port_info" | \ + grep -E "[^[:space:]]+[[:space:]]+[^[:space:]]+[[:space:]]+[^[:space:]]+[[:space:]]+[^[:space:]]+[[:space:]]+[^[:space:]]+" -o) + is_listening_all=$(echo "$port_info" | \ + grep -E "[[:space:]](\\*|\\[?::\\]?):$rsync_port\$") + fi + local is_listening_addr=$(echo "$port_info" | \ + grep -w -F -- "$rsync_addr:$rsync_port") + if [ -z "$is_listening_addr" ]; then + is_listening_addr=$(echo "$port_info" | \ + grep -w -F "[$rsync_addr]:$rsync_port") + fi + if [ -n "$is_listening_all" -o -n "$is_listening_addr" ]; then + wsrep_log_error "rsync or stunnel daemon port '$rsync_port' " \ + "has been taken by another program" exit 16 # EBUSY fi + return 1 fi - check_pid "$pid_file" && \ - [ -n "$port_info" ] && [ -n "$is_rsync" ] && \ - [ $(cat "$pid_file") -eq $rsync_pid ] -} -is_local_ip() -{ - local address="$1" - local get_addr_bin="$(command -v ifconfig)" - if [ -z "$get_addr_bin" ] - then - get_addr_bin="$(command -v ip) address show" - # Add an slash at the end, so we don't get false positive : 172.18.0.4 matches 172.18.0.41 - # ip output format is "X.X.X.X/mask" - address="$address/" - else - # Add an space at the end, so we don't get false positive : 172.18.0.4 matches 172.18.0.41 - # ifconfig output format is "X.X.X.X " - address="$address " - fi - - $get_addr_bin | grep -F "$address" > /dev/null + check_pid "$pid_file" && [ $(cat "$pid_file") -eq $rsync_pid ] } STUNNEL_CONF="$WSREP_SST_OPT_DATA/stunnel.conf" @@ -225,11 +228,11 @@ check_server_ssl_config() SSLMODE=$(parse_cnf 'sst' 'ssl-mode' | tr [:lower:] [:upper:]) -if [ -z "$SSTKEY" -a -z "$SSTCERT" ] +if [ -z "$SSTKEY" -a -z "$SSTCERT" -a -z "$SSTCA" ] then # no old-style SSL config in [sst], check for new one check_server_ssl_config 'sst' - if [ -z "$SSTKEY" -a -z "$SSTCERT" ]; then + if [ -z "$SSTKEY" -a -z "$SSTCERT" -a -z "$SSTCA" ]; then check_server_ssl_config '--mysqld' fi fi @@ -279,7 +282,7 @@ fi STUNNEL="" if [ -n "$SSLMODE" -a "$SSLMODE" != 'DISABLED' ] && wsrep_check_programs stunnel then - wsrep_log_info "Using stunnel for SSL encryption: CAfile: $SSTCA, SSLMODE: $SSLMODE" + wsrep_log_info "Using stunnel for SSL encryption: CAfile: '$SSTCA', SSLMODE: '$SSLMODE'" STUNNEL="stunnel $STUNNEL_CONF" fi @@ -296,7 +299,7 @@ foreground = yes pid = $STUNNEL_PID debug = warning client = yes -connect = ${WSREP_SST_OPT_ADDR%/*} +connect = $WSREP_SST_OPT_HOST_UNESCAPED:$WSREP_SST_OPT_PORT TIMEOUTclose = 0 ${VERIFY_OPT} EOF @@ -322,7 +325,7 @@ EOF # (b) Cluster state ID & wsrep_gtid_domain_id to be written to the file, OR # (c) ERROR file, in case flush tables operation failed. - while [ ! -r "$FLUSHED" ] && ! grep -q ':' "$FLUSHED" >/dev/null 2>&1 + while [ ! -r "$FLUSHED" ] && ! grep -q -F ':' "$FLUSHED" >/dev/null 2>&1 do # Check whether ERROR file exists. if [ -f "$ERROR" ] @@ -365,15 +368,14 @@ EOF # first, the normal directories, so that we can detect incompatible protocol RC=0 - eval rsync ${STUNNEL:+--rsh=\"$STUNNEL\"} \ + eval rsync "'${STUNNEL:+--rsh=$STUNNEL}'" \ --owner --group --perms --links --specials \ --ignore-times --inplace --dirs --delete --quiet \ - $WHOLE_FILE_OPT ${FILTER} "$WSREP_SST_OPT_DATA/" \ - rsync://$WSREP_SST_OPT_ADDR >&2 || RC=$? + $WHOLE_FILE_OPT $FILTER "'$WSREP_SST_OPT_DATA/'" \ + "'rsync://$WSREP_SST_OPT_ADDR'" >&2 || RC=$? if [ $RC -ne 0 ]; then wsrep_log_error "rsync returned code $RC:" - case $RC in 12) RC=71 # EPROTO wsrep_log_error \ @@ -394,7 +396,7 @@ EOF --ignore-times --inplace --dirs --delete --quiet \ $WHOLE_FILE_OPT -f '+ /ibdata*' -f '+ /ib_lru_dump' \ -f '- **' "$INNODB_DATA_HOME_DIR/" \ - rsync://$WSREP_SST_OPT_ADDR-data_dir >&2 || RC=$? + "rsync://$WSREP_SST_OPT_ADDR-data_dir" >&2 || RC=$? if [ $RC -ne 0 ]; then wsrep_log_error "rsync innodb_data_home_dir returned code $RC:" @@ -405,28 +407,32 @@ EOF rsync ${STUNNEL:+--rsh="$STUNNEL"} \ --owner --group --perms --links --specials \ --ignore-times --inplace --dirs --delete --quiet \ - $WHOLE_FILE_OPT -f '+ /ib_logfile[0-9]*' -f '+ /aria_log.*' -f '+ /aria_log_control' -f '- **' "$WSREP_LOG_DIR/" \ - rsync://$WSREP_SST_OPT_ADDR-log_dir >&2 || RC=$? + $WHOLE_FILE_OPT -f '+ /ib_logfile[0-9]*' -f '+ /aria_log.*' \ + -f '+ /aria_log_control' -f '- **' "$WSREP_LOG_DIR/" \ + "rsync://$WSREP_SST_OPT_ADDR-log_dir" >&2 || RC=$? if [ $RC -ne 0 ]; then wsrep_log_error "rsync innodb_log_group_home_dir returned code $RC:" exit 255 # unknown error fi - # then, we parallelize the transfer of database directories, use . so that pathconcatenation works + # then, we parallelize the transfer of database directories, + # use . so that path concatenation works: + cd "$WSREP_SST_OPT_DATA" count=1 - [ "$OS" = "Linux" ] && count=$(grep -c processor /proc/cpuinfo) - [ "$OS" = "Darwin" -o "$OS" = "FreeBSD" ] && count=$(sysctl -n hw.ncpu) + [ "$OS" = 'Linux' ] && count=$(grep -c processor /proc/cpuinfo) + [ "$OS" = 'Darwin' -o "$OS" = 'FreeBSD' ] && count=$(sysctl -n hw.ncpu) - find . -maxdepth 1 -mindepth 1 -type d -not -name "lost+found" -not -name ".zfs" \ - -print0 | xargs -I{} -0 -P $count \ + find . -maxdepth 1 -mindepth 1 -type d -not -name 'lost+found' \ + -not -name '.zfs' -print0 | xargs -I{} -0 -P $count \ rsync ${STUNNEL:+--rsh="$STUNNEL"} \ --owner --group --perms --links --specials \ --ignore-times --inplace --recursive --delete --quiet \ - $WHOLE_FILE_OPT --exclude '*/ib_logfile*' --exclude "*/aria_log.*" --exclude "*/aria_log_control" "$WSREP_SST_OPT_DATA"/{}/ \ - rsync://$WSREP_SST_OPT_ADDR/{} >&2 || RC=$? + $WHOLE_FILE_OPT --exclude '*/ib_logfile*' --exclude '*/aria_log.*' \ + --exclude '*/aria_log_control' "$WSREP_SST_OPT_DATA/{}/" \ + "rsync://$WSREP_SST_OPT_ADDR/{}" >&2 || RC=$? cd "$OLD_PWD" @@ -455,13 +461,13 @@ EOF fi rsync ${STUNNEL:+--rsh="$STUNNEL"} \ - --archive --quiet --checksum "$MAGIC_FILE" rsync://$WSREP_SST_OPT_ADDR + --archive --quiet --checksum "$MAGIC_FILE" "rsync://$WSREP_SST_OPT_ADDR" echo "done $STATE" elif [ "$WSREP_SST_OPT_ROLE" = 'joiner' ] then - wsrep_check_programs lsof + check_sockets_utils touch "$SST_PROGRESS_FILE" MYSQLD_PID="$WSREP_SST_OPT_PARENT" @@ -488,6 +494,7 @@ then ADDR="$WSREP_SST_OPT_ADDR" RSYNC_PORT="$WSREP_SST_OPT_PORT" RSYNC_ADDR="$WSREP_SST_OPT_HOST" + RSYNC_ADDR_UNESCAPED="$WSREP_SST_OPT_HOST_UNESCAPED" trap "exit 32" HUP PIPE trap "exit 3" INT TERM ABRT @@ -519,10 +526,10 @@ EOF # rm -rf "$DATA"/ib_logfile* # we don't want old logs around # If the IP is local listen only in it - if is_local_ip "$RSYNC_ADDR" + if is_local_ip "$RSYNC_ADDR_UNESCAPED" then - RSYNC_EXTRA_ARGS="--address $RSYNC_ADDR" - STUNNEL_ACCEPT="$RSYNC_ADDR:$RSYNC_PORT" + RSYNC_EXTRA_ARGS="--address $RSYNC_ADDR_UNESCAPED" + STUNNEL_ACCEPT="$RSYNC_ADDR_UNESCAPED:$RSYNC_PORT" else # Not local, possibly a NAT, listen on all interfaces RSYNC_EXTRA_ARGS="" @@ -533,7 +540,7 @@ EOF if [ -z "$STUNNEL" ] then - rsync --daemon --no-detach --port "$RSYNC_PORT" --config "$RSYNC_CONF" ${RSYNC_EXTRA_ARGS} & + rsync --daemon --no-detach --port "$RSYNC_PORT" --config "$RSYNC_CONF" $RSYNC_EXTRA_ARGS & RSYNC_REAL_PID=$! else cat << EOF > "$STUNNEL_CONF" @@ -543,18 +550,19 @@ ${CAFILE_OPT} foreground = yes pid = $STUNNEL_PID debug = warning +debug = 6 client = no [rsync] accept = $STUNNEL_ACCEPT exec = $(command -v rsync) -execargs = rsync --server --daemon --config='$RSYNC_CONF' . +execargs = rsync --server --daemon --config=$RSYNC_CONF . EOF stunnel "$STUNNEL_CONF" & RSYNC_REAL_PID=$! RSYNC_PID="$STUNNEL_PID" fi - until check_pid_and_port "$RSYNC_PID" "$RSYNC_REAL_PID" "$RSYNC_ADDR" "$RSYNC_PORT" + until check_pid_and_port "$RSYNC_PID" "$RSYNC_REAL_PID" "$RSYNC_ADDR_UNESCAPED" "$RSYNC_PORT" do sleep 0.2 done @@ -571,10 +579,10 @@ EOF exit 42 fi CN=$("$OPENSSL_BINARY" x509 -noout -subject -in "$SSTCERT" | \ - tr "," "\n" | grep "CN =" | cut -d= -f2 | sed s/^\ // | \ + tr "," "\n" | grep -F 'CN =' | cut -d= -f2 | sed s/^\ // | \ sed s/\ %//) fi - MY_SECRET=$(wsrep_gen_secret) + MY_SECRET="$(wsrep_gen_secret)" # Add authentication data to address ADDR="$CN:$MY_SECRET@$WSREP_SST_OPT_HOST" else @@ -624,7 +632,7 @@ EOF if [ -r "$MAGIC_FILE" ] then # check donor supplied secret - SECRET=$(grep "$SECRET_TAG " "$MAGIC_FILE" 2>/dev/null | cut -d ' ' -f 2) + SECRET=$(grep -F -- "$SECRET_TAG " "$MAGIC_FILE" 2>/dev/null | cut -d ' ' -f 2) if [ "$SECRET" != "$MY_SECRET" ]; then wsrep_log_error "Donor does not know my secret!" wsrep_log_info "Donor:'$SECRET', my:'$MY_SECRET'" @@ -632,7 +640,7 @@ EOF fi # remove secret from magic file - grep -v "$SECRET_TAG " "$MAGIC_FILE" > "$MAGIC_FILE.new" + grep -v -F -- "$SECRET_TAG " "$MAGIC_FILE" > "$MAGIC_FILE.new" mv "$MAGIC_FILE.new" "$MAGIC_FILE" # UUID:seqno & wsrep_gtid_domain_id is received here. @@ -643,7 +651,7 @@ EOF fi wsrep_cleanup_progress_file -# cleanup_joiner +# cleanup_joiner else wsrep_log_error "Unrecognized role: '$WSREP_SST_OPT_ROLE'" exit 22 # EINVAL |