summaryrefslogtreecommitdiff
path: root/scripts/wsrep_sst_rsync.sh
diff options
context:
space:
mode:
authorJulius Goryavsky <julius.goryavsky@mariadb.com>2021-05-10 04:27:16 +0200
committerJulius Goryavsky <julius.goryavsky@mariadb.com>2021-05-11 04:07:04 +0200
commit3e205cacce5fbc0a5222a7d6c27baddf1eae370d (patch)
tree40691954b04fc20c68aab6600d5205da3a13806a /scripts/wsrep_sst_rsync.sh
parentf8665314d4ba190679001b81bb7d9fd7a38fc0f6 (diff)
downloadmariadb-git-10.6-MDEV-23580.tar.gz
MDEV-23580: WSREP_SST: [ERROR] rsync daemon port has been taken10.6-MDEV-23580
This commit contains a large set of further bug fixes and improvements to SST scripts for Galera, continuing the work that was started in MDEV-24962 to make SST scripts work smoothly in different network configurations (especially using ipv6) and with different environment settings: 1) The ipv6 addresses were incorrectly handled in the SST script for rsync (incorrect address substitution for establishing a connection, incorrect address substitution for bind, and so on); 2) Checking the locality of the ip-address in SST scripts did not support ipv6 addresses (such as "[::1]"), which were falsely identified as non-local ip, which further did not allow running two SSTs on different local addresses on the same machine. On the other hand, this bug masked some other errors (related to handling ipv6 addresses); 3) The code for checking the locality of the ip address was different in the SST scripts for rsync and for mysqldump, with individual flaws. This code is now made common and moved to wsrep_sst_common; 4) Waiting for the start of the transport channel (socat, nc, rsync, stunnel) in the wait_for_listen() and check_pid_and_port() functions did not process ipv6 addresses correctly in all cases (not for all branches); 5) Waiting for the start of the transport channel (socat, nc, rsync, stunnel) in the wait_for_listen() and check_pid_and_port() functions for some code branches could give a false positive result due to the textual match of prefixes in the port number and/or PID of the process; 6) Waiting for the start of the transport channel (socat, nc, rsync, stunnel) was supported through different utilities in SST scripts for mariabackup and for rsync, and with various minor flaws in the code. Now the code is still different in these scripts, but it supports a common set of utilities (lsof, ss, sockstat) and is synchronized across patterns that used to check the output of these utilities; 7) In SST via mariabackup, the signal about readiness to receive data is sometimes sent too early - immediately after listen(), and not after accept() (which are called by socat or netcat utility). 8) Checking availability of the some options of some utilities was done using the grep pattern, which easily gives false positives; 9) Common name (CN) for local addresses, if not explicitly specified, is now always replaced to "localhost" to avoid the need to generate many separate certificates for local addresses of one machine and not to depend on which the local address is currently used in test (ipv4 or ipv6, etc.); 10) In tests galera_sst_mariabackup_encrypt_with_key_server and galera_sst_rsync_encrypt_with_key_server the correct certificate is selected to avoid commonname (CN) mismatch problems; 11) Further refactoring to protect against spaces in file names. 12) Further general refactoring to eliminate bash-specific constructs or to improve code readability; 13) The code for setting options for the nc (netcat) utility was different in different scripts for SST - now it is made identical. 14) Fixed long-time broken encryption via xbcrypt in combination with mariabackup and added support for key-based encryption via openssl utility, which is now enabled by default for encrypt=1 mode (this default mode can be changed using a new configuration file option "encypt-format=openssl|xbcrypt", which can be placed in the [mysqld], [sst] or in the [xtrabackup] section) - this change will allow us to use and to test the encypt=1 encryption without installing non-standard third-party utilities.
Diffstat (limited to 'scripts/wsrep_sst_rsync.sh')
-rw-r--r--scripts/wsrep_sst_rsync.sh178
1 files changed, 93 insertions, 85 deletions
diff --git a/scripts/wsrep_sst_rsync.sh b/scripts/wsrep_sst_rsync.sh
index 92fdc28f643..70e4a3326a1 100644
--- a/scripts/wsrep_sst_rsync.sh
+++ b/scripts/wsrep_sst_rsync.sh
@@ -23,13 +23,13 @@ RSYNC_PID= # rsync pid file
RSYNC_CONF= # rsync configuration file
RSYNC_REAL_PID= # rsync process id
-OS=$(uname)
+OS="$(uname)"
[ "$OS" = 'Darwin' ] && export -n LD_LIBRARY_PATH
# Setting the path for lsof on CentOS
export PATH="/usr/sbin:/sbin:$PATH"
-. $(dirname $0)/wsrep_sst_common
+. $(dirname "$0")/wsrep_sst_common
wsrep_check_datadir
wsrep_check_programs rsync
@@ -48,7 +48,7 @@ cleanup_joiner()
rm -rf "$MAGIC_FILE"
rm -rf "$RSYNC_PID"
wsrep_log_info "Joiner cleanup done."
- if [ "${WSREP_SST_OPT_ROLE}" = "joiner" ];then
+ if [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]; then
wsrep_cleanup_progress_file
fi
}
@@ -57,68 +57,71 @@ cleanup_joiner()
check_pid()
{
local pid_file="$1"
- [ -r "$pid_file" ] && ps -p $(cat "$pid_file") >/dev/null 2>&1
+ [ -r "$pid_file" ] && ps -p $(cat "$pid_file") 2>&1 >/dev/null
}
check_pid_and_port()
{
local pid_file="$1"
local rsync_pid=$2
- local rsync_addr=$3
- local rsync_port=$4
-
- case $OS in
- FreeBSD)
- local port_info="$(sockstat -46lp ${rsync_port} 2>/dev/null | \
- grep ":${rsync_port}")"
- local is_rsync="$(echo $port_info | \
- grep -E '[[:space:]]+(rsync|stunnel)[[:space:]]+'"$rsync_pid" 2>/dev/null)"
- ;;
- *)
- if [ ! -x "$(command -v lsof)" ]; then
- wsrep_log_error "lsof tool not found in PATH! Make sure you have it installed."
- exit 2 # ENOENT
- fi
- local port_info="$(lsof -i :$rsync_port -Pn 2>/dev/null | \
- grep "(LISTEN)")"
- local is_rsync="$(echo $port_info | \
- grep -E '^(rsync|stunnel)[[:space:]]+'"$rsync_pid" 2>/dev/null)"
- ;;
- esac
+ local rsync_addr="$3"
+ local rsync_port="$4"
+
+ if [ -z "$rsync_port" -o -z "$rsync_addr" -o -z "$rsync_pid" ]; then
+ wsrep_log_error "check_pid_and_port(): bad arguments"
+ exit 2 # ENOENT
+ fi
- local is_listening_all="$(echo $port_info | \
- grep "*:$rsync_port" 2>/dev/null)"
- local is_listening_addr="$(echo $port_info | \
- grep -F "$rsync_addr:$rsync_port" 2>/dev/null)"
+ local port_info is_rsync
+
+ if [ $lsof_available -ne 0 ]; then
+ port_info=$(lsof -i ":$rsync_port" -Pn 2>/dev/null | \
+ grep -F '(LISTEN)')
+ is_rsync=$(echo "$port_info" | \
+ grep -E "^(rsync|stunnel)[^[:space:]]*[[:space:]]+$rsync_pid[[:space:]]+")
+ elif [ $sockstat_available -ne 0 ]; then
+ port_info=$(sockstat -p "$rsync_port" 2>/dev/null | \
+ grep -F 'LISTEN')
+ is_rsync=$(echo "$port_info" | \
+ grep -E "[[:space:]]+(rsync|stunnel)[^[:space:]]*[[:space:]]+$rsync_pid[[:space:]]+")
+ elif [ $ss_available -ne 0 ]; then
+ port_info=$(ss -H -p -n -l "( sport = :$rsync_port )" 2>/dev/null)
+ is_rsync=$(echo "$port_info" | \
+ grep -E "users:\\(.*\\(\"(rsync|stunnel)[^[:space:]]*\".*\<pid=$rsync_pid\>.*\\)")
+ else
+ wsrep_log_error "unknown sockets utility"
+ exit 2 # ENOENT
+ fi
- if [ ! -z "$is_listening_all" -o ! -z "$is_listening_addr" ]; then
- if [ -z "$is_rsync" ]; then
- wsrep_log_error "rsync daemon port '$rsync_port' has been taken"
+ if [ -z "$is_rsync" ]; then
+ local is_listening_all
+ if [ $lsof_available -ne 0 ]; then
+ is_listening_all=$(echo "$port_info" | \
+ grep -E "[[:space:]](\\*|\\[?::\\]?):$rsync_port[[:space:]]")
+ else
+ if [ $sockstat_available -eq 0 ]; then
+ port_info=$(echo "$port_info" | grep -q -F 'users:(')
+ fi
+ port_info=$(echo "$port_info" | \
+ grep -E "[^[:space:]]+[[:space:]]+[^[:space:]]+[[:space:]]+[^[:space:]]+[[:space:]]+[^[:space:]]+[[:space:]]+[^[:space:]]+" -o)
+ is_listening_all=$(echo "$port_info" | \
+ grep -E "[[:space:]](\\*|\\[?::\\]?):$rsync_port\$")
+ fi
+ local is_listening_addr=$(echo "$port_info" | \
+ grep -w -F -- "$rsync_addr:$rsync_port")
+ if [ -z "$is_listening_addr" ]; then
+ is_listening_addr=$(echo "$port_info" | \
+ grep -w -F "[$rsync_addr]:$rsync_port")
+ fi
+ if [ -n "$is_listening_all" -o -n "$is_listening_addr" ]; then
+ wsrep_log_error "rsync or stunnel daemon port '$rsync_port' " \
+ "has been taken by another program"
exit 16 # EBUSY
fi
+ return 1
fi
- check_pid "$pid_file" && \
- [ -n "$port_info" ] && [ -n "$is_rsync" ] && \
- [ $(cat "$pid_file") -eq $rsync_pid ]
-}
-is_local_ip()
-{
- local address="$1"
- local get_addr_bin="$(command -v ifconfig)"
- if [ -z "$get_addr_bin" ]
- then
- get_addr_bin="$(command -v ip) address show"
- # Add an slash at the end, so we don't get false positive : 172.18.0.4 matches 172.18.0.41
- # ip output format is "X.X.X.X/mask"
- address="$address/"
- else
- # Add an space at the end, so we don't get false positive : 172.18.0.4 matches 172.18.0.41
- # ifconfig output format is "X.X.X.X "
- address="$address "
- fi
-
- $get_addr_bin | grep -F "$address" > /dev/null
+ check_pid "$pid_file" && [ $(cat "$pid_file") -eq $rsync_pid ]
}
STUNNEL_CONF="$WSREP_SST_OPT_DATA/stunnel.conf"
@@ -225,11 +228,11 @@ check_server_ssl_config()
SSLMODE=$(parse_cnf 'sst' 'ssl-mode' | tr [:lower:] [:upper:])
-if [ -z "$SSTKEY" -a -z "$SSTCERT" ]
+if [ -z "$SSTKEY" -a -z "$SSTCERT" -a -z "$SSTCA" ]
then
# no old-style SSL config in [sst], check for new one
check_server_ssl_config 'sst'
- if [ -z "$SSTKEY" -a -z "$SSTCERT" ]; then
+ if [ -z "$SSTKEY" -a -z "$SSTCERT" -a -z "$SSTCA" ]; then
check_server_ssl_config '--mysqld'
fi
fi
@@ -279,7 +282,7 @@ fi
STUNNEL=""
if [ -n "$SSLMODE" -a "$SSLMODE" != 'DISABLED' ] && wsrep_check_programs stunnel
then
- wsrep_log_info "Using stunnel for SSL encryption: CAfile: $SSTCA, SSLMODE: $SSLMODE"
+ wsrep_log_info "Using stunnel for SSL encryption: CAfile: '$SSTCA', SSLMODE: '$SSLMODE'"
STUNNEL="stunnel $STUNNEL_CONF"
fi
@@ -296,7 +299,7 @@ foreground = yes
pid = $STUNNEL_PID
debug = warning
client = yes
-connect = ${WSREP_SST_OPT_ADDR%/*}
+connect = $WSREP_SST_OPT_HOST_UNESCAPED:$WSREP_SST_OPT_PORT
TIMEOUTclose = 0
${VERIFY_OPT}
EOF
@@ -322,7 +325,7 @@ EOF
# (b) Cluster state ID & wsrep_gtid_domain_id to be written to the file, OR
# (c) ERROR file, in case flush tables operation failed.
- while [ ! -r "$FLUSHED" ] && ! grep -q ':' "$FLUSHED" >/dev/null 2>&1
+ while [ ! -r "$FLUSHED" ] && ! grep -q -F ':' "$FLUSHED" >/dev/null 2>&1
do
# Check whether ERROR file exists.
if [ -f "$ERROR" ]
@@ -365,15 +368,14 @@ EOF
# first, the normal directories, so that we can detect incompatible protocol
RC=0
- eval rsync ${STUNNEL:+--rsh=\"$STUNNEL\"} \
+ eval rsync "'${STUNNEL:+--rsh=$STUNNEL}'" \
--owner --group --perms --links --specials \
--ignore-times --inplace --dirs --delete --quiet \
- $WHOLE_FILE_OPT ${FILTER} "$WSREP_SST_OPT_DATA/" \
- rsync://$WSREP_SST_OPT_ADDR >&2 || RC=$?
+ $WHOLE_FILE_OPT $FILTER "'$WSREP_SST_OPT_DATA/'" \
+ "'rsync://$WSREP_SST_OPT_ADDR'" >&2 || RC=$?
if [ $RC -ne 0 ]; then
wsrep_log_error "rsync returned code $RC:"
-
case $RC in
12) RC=71 # EPROTO
wsrep_log_error \
@@ -394,7 +396,7 @@ EOF
--ignore-times --inplace --dirs --delete --quiet \
$WHOLE_FILE_OPT -f '+ /ibdata*' -f '+ /ib_lru_dump' \
-f '- **' "$INNODB_DATA_HOME_DIR/" \
- rsync://$WSREP_SST_OPT_ADDR-data_dir >&2 || RC=$?
+ "rsync://$WSREP_SST_OPT_ADDR-data_dir" >&2 || RC=$?
if [ $RC -ne 0 ]; then
wsrep_log_error "rsync innodb_data_home_dir returned code $RC:"
@@ -405,28 +407,32 @@ EOF
rsync ${STUNNEL:+--rsh="$STUNNEL"} \
--owner --group --perms --links --specials \
--ignore-times --inplace --dirs --delete --quiet \
- $WHOLE_FILE_OPT -f '+ /ib_logfile[0-9]*' -f '+ /aria_log.*' -f '+ /aria_log_control' -f '- **' "$WSREP_LOG_DIR/" \
- rsync://$WSREP_SST_OPT_ADDR-log_dir >&2 || RC=$?
+ $WHOLE_FILE_OPT -f '+ /ib_logfile[0-9]*' -f '+ /aria_log.*' \
+ -f '+ /aria_log_control' -f '- **' "$WSREP_LOG_DIR/" \
+ "rsync://$WSREP_SST_OPT_ADDR-log_dir" >&2 || RC=$?
if [ $RC -ne 0 ]; then
wsrep_log_error "rsync innodb_log_group_home_dir returned code $RC:"
exit 255 # unknown error
fi
- # then, we parallelize the transfer of database directories, use . so that pathconcatenation works
+ # then, we parallelize the transfer of database directories,
+ # use . so that path concatenation works:
+
cd "$WSREP_SST_OPT_DATA"
count=1
- [ "$OS" = "Linux" ] && count=$(grep -c processor /proc/cpuinfo)
- [ "$OS" = "Darwin" -o "$OS" = "FreeBSD" ] && count=$(sysctl -n hw.ncpu)
+ [ "$OS" = 'Linux' ] && count=$(grep -c processor /proc/cpuinfo)
+ [ "$OS" = 'Darwin' -o "$OS" = 'FreeBSD' ] && count=$(sysctl -n hw.ncpu)
- find . -maxdepth 1 -mindepth 1 -type d -not -name "lost+found" -not -name ".zfs" \
- -print0 | xargs -I{} -0 -P $count \
+ find . -maxdepth 1 -mindepth 1 -type d -not -name 'lost+found' \
+ -not -name '.zfs' -print0 | xargs -I{} -0 -P $count \
rsync ${STUNNEL:+--rsh="$STUNNEL"} \
--owner --group --perms --links --specials \
--ignore-times --inplace --recursive --delete --quiet \
- $WHOLE_FILE_OPT --exclude '*/ib_logfile*' --exclude "*/aria_log.*" --exclude "*/aria_log_control" "$WSREP_SST_OPT_DATA"/{}/ \
- rsync://$WSREP_SST_OPT_ADDR/{} >&2 || RC=$?
+ $WHOLE_FILE_OPT --exclude '*/ib_logfile*' --exclude '*/aria_log.*' \
+ --exclude '*/aria_log_control' "$WSREP_SST_OPT_DATA/{}/" \
+ "rsync://$WSREP_SST_OPT_ADDR/{}" >&2 || RC=$?
cd "$OLD_PWD"
@@ -455,13 +461,13 @@ EOF
fi
rsync ${STUNNEL:+--rsh="$STUNNEL"} \
- --archive --quiet --checksum "$MAGIC_FILE" rsync://$WSREP_SST_OPT_ADDR
+ --archive --quiet --checksum "$MAGIC_FILE" "rsync://$WSREP_SST_OPT_ADDR"
echo "done $STATE"
elif [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]
then
- wsrep_check_programs lsof
+ check_sockets_utils
touch "$SST_PROGRESS_FILE"
MYSQLD_PID="$WSREP_SST_OPT_PARENT"
@@ -488,6 +494,7 @@ then
ADDR="$WSREP_SST_OPT_ADDR"
RSYNC_PORT="$WSREP_SST_OPT_PORT"
RSYNC_ADDR="$WSREP_SST_OPT_HOST"
+ RSYNC_ADDR_UNESCAPED="$WSREP_SST_OPT_HOST_UNESCAPED"
trap "exit 32" HUP PIPE
trap "exit 3" INT TERM ABRT
@@ -519,10 +526,10 @@ EOF
# rm -rf "$DATA"/ib_logfile* # we don't want old logs around
# If the IP is local listen only in it
- if is_local_ip "$RSYNC_ADDR"
+ if is_local_ip "$RSYNC_ADDR_UNESCAPED"
then
- RSYNC_EXTRA_ARGS="--address $RSYNC_ADDR"
- STUNNEL_ACCEPT="$RSYNC_ADDR:$RSYNC_PORT"
+ RSYNC_EXTRA_ARGS="--address $RSYNC_ADDR_UNESCAPED"
+ STUNNEL_ACCEPT="$RSYNC_ADDR_UNESCAPED:$RSYNC_PORT"
else
# Not local, possibly a NAT, listen on all interfaces
RSYNC_EXTRA_ARGS=""
@@ -533,7 +540,7 @@ EOF
if [ -z "$STUNNEL" ]
then
- rsync --daemon --no-detach --port "$RSYNC_PORT" --config "$RSYNC_CONF" ${RSYNC_EXTRA_ARGS} &
+ rsync --daemon --no-detach --port "$RSYNC_PORT" --config "$RSYNC_CONF" $RSYNC_EXTRA_ARGS &
RSYNC_REAL_PID=$!
else
cat << EOF > "$STUNNEL_CONF"
@@ -543,18 +550,19 @@ ${CAFILE_OPT}
foreground = yes
pid = $STUNNEL_PID
debug = warning
+debug = 6
client = no
[rsync]
accept = $STUNNEL_ACCEPT
exec = $(command -v rsync)
-execargs = rsync --server --daemon --config='$RSYNC_CONF' .
+execargs = rsync --server --daemon --config=$RSYNC_CONF .
EOF
stunnel "$STUNNEL_CONF" &
RSYNC_REAL_PID=$!
RSYNC_PID="$STUNNEL_PID"
fi
- until check_pid_and_port "$RSYNC_PID" "$RSYNC_REAL_PID" "$RSYNC_ADDR" "$RSYNC_PORT"
+ until check_pid_and_port "$RSYNC_PID" "$RSYNC_REAL_PID" "$RSYNC_ADDR_UNESCAPED" "$RSYNC_PORT"
do
sleep 0.2
done
@@ -571,10 +579,10 @@ EOF
exit 42
fi
CN=$("$OPENSSL_BINARY" x509 -noout -subject -in "$SSTCERT" | \
- tr "," "\n" | grep "CN =" | cut -d= -f2 | sed s/^\ // | \
+ tr "," "\n" | grep -F 'CN =' | cut -d= -f2 | sed s/^\ // | \
sed s/\ %//)
fi
- MY_SECRET=$(wsrep_gen_secret)
+ MY_SECRET="$(wsrep_gen_secret)"
# Add authentication data to address
ADDR="$CN:$MY_SECRET@$WSREP_SST_OPT_HOST"
else
@@ -624,7 +632,7 @@ EOF
if [ -r "$MAGIC_FILE" ]
then
# check donor supplied secret
- SECRET=$(grep "$SECRET_TAG " "$MAGIC_FILE" 2>/dev/null | cut -d ' ' -f 2)
+ SECRET=$(grep -F -- "$SECRET_TAG " "$MAGIC_FILE" 2>/dev/null | cut -d ' ' -f 2)
if [ "$SECRET" != "$MY_SECRET" ]; then
wsrep_log_error "Donor does not know my secret!"
wsrep_log_info "Donor:'$SECRET', my:'$MY_SECRET'"
@@ -632,7 +640,7 @@ EOF
fi
# remove secret from magic file
- grep -v "$SECRET_TAG " "$MAGIC_FILE" > "$MAGIC_FILE.new"
+ grep -v -F -- "$SECRET_TAG " "$MAGIC_FILE" > "$MAGIC_FILE.new"
mv "$MAGIC_FILE.new" "$MAGIC_FILE"
# UUID:seqno & wsrep_gtid_domain_id is received here.
@@ -643,7 +651,7 @@ EOF
fi
wsrep_cleanup_progress_file
-# cleanup_joiner
+# cleanup_joiner
else
wsrep_log_error "Unrecognized role: '$WSREP_SST_OPT_ROLE'"
exit 22 # EINVAL