diff options
author | Alexey Yurchenko <ayurchen@gmail.com> | 2015-06-06 01:38:07 +0300 |
---|---|---|
committer | Nirbhay Choubey <nirbhay@mariadb.com> | 2015-09-09 17:03:23 -0400 |
commit | a7ea3ec34d7c542542dc11e324b1667fd9ba1a48 (patch) | |
tree | c0b4cff38f5c6185d89106d96d05bb6d93d16d16 /scripts/wsrep_sst_xtrabackup-v2.sh | |
parent | d78110e7fae1588afcb6bc5ea08be0d84ee18857 (diff) | |
download | mariadb-git-a7ea3ec34d7c542542dc11e324b1667fd9ba1a48.tar.gz |
Synced xtrabackup SST fixes from Percona tree (as of PXC 5.6.24-25.11 release). This fixes/adresses the following LP bugs:
- LP1380697: wsrep_sst_xtrabackup-v2 doesn't stop when mysql is SIGKILLed. (full fix for this (as engineeered by Percona) requires Linux-specific patch that we don't carry, but keep xtrabackup scripts as close as possible)
- LP1399134: Log the innobackupex/SST logs in SST to syslog if possible. (fixed)
- LP1405668: Race condition between donor and joiner in PXB SST. (fixed)
- LP1405985: Fail early if xtrabackup_checkkpoints is missing. (fixed)
- LP1407599: wsrep_sst_xtrabackup-v2 script causes innobackupex to print a false positive stack trace into the log. (fixed)
- LP1441762: IST Fails with SST script error. (fixed)
- LP1451670: Fail when move-back fails in xtrabackup SST. (fixed)
Diffstat (limited to 'scripts/wsrep_sst_xtrabackup-v2.sh')
-rw-r--r-- | scripts/wsrep_sst_xtrabackup-v2.sh | 304 |
1 files changed, 197 insertions, 107 deletions
diff --git a/scripts/wsrep_sst_xtrabackup-v2.sh b/scripts/wsrep_sst_xtrabackup-v2.sh index 5339fd721da..c7eb474c919 100644 --- a/scripts/wsrep_sst_xtrabackup-v2.sh +++ b/scripts/wsrep_sst_xtrabackup-v2.sh @@ -29,6 +29,8 @@ ekeyfile="" encrypt=0 nproc=1 ecode=0 +ssyslog="" +ssystag="" XTRABACKUP_PID="" SST_PORT="" REMOTEIP="" @@ -40,15 +42,15 @@ progress="" ttime=0 totime=0 lsn="" -incremental=0 ecmd="" rlimit="" # Initially stagemsg="${WSREP_SST_OPT_ROLE}" cpat="" -speciald=0 +speciald=1 ib_home_dir="" ib_log_dir="" +ib_undo_dir="" sfmt="tar" strmcmd="" @@ -70,6 +72,11 @@ xtmpdir="" scomp="" sdecomp="" +# Required for backup locks +# For backup locks it is 1 sent by joiner +# 5.6.21 PXC and later can't donate to an older joiner +sst_ver=1 + if which pv &>/dev/null && pv --help | grep -q FORMAT;then pvopts+=$pvformat fi @@ -168,7 +175,11 @@ get_transfer() fi wsrep_log_info "Using netcat as streamer" if [[ "$WSREP_SST_OPT_ROLE" == "joiner" ]];then - tcmd="nc -dl ${TSST_PORT}" + if nc -h | grep -q ncat;then + tcmd="nc -l ${TSST_PORT}" + else + tcmd="nc -dl ${TSST_PORT}" + fi else tcmd="nc ${REMOTEIP} ${TSST_PORT}" fi @@ -252,6 +263,16 @@ get_footprint() adjust_progress() { + + if [[ ! -x `which pv` ]];then + wsrep_log_error "pv not found in path: $PATH" + wsrep_log_error "Disabling all progress/rate-limiting" + pcmd="" + rlimit="" + progress="" + return + fi + if [[ -n $progress && $progress != '1' ]];then if [[ -e $progress ]];then pcmd+=" 2>>$progress" @@ -281,8 +302,7 @@ read_cnf() progress=$(parse_cnf sst progress "") rebuild=$(parse_cnf sst rebuild 0) ttime=$(parse_cnf sst time 0) - cpat=$(parse_cnf sst cpat '.*galera\.cache$\|.*sst_in_progress$\|.*grastate\.dat$\|.*gvwstate\.dat$\|.*\.err$\|.*\.log$\|.*RPM_UPGRADE_MARKER$\|.*RPM_UPGRADE_HISTORY$') - incremental=$(parse_cnf sst incremental 0) + cpat=$(parse_cnf sst cpat '.*galera\.cache$\|.*sst_in_progress$\|.*\.sst$\|.*gvwstate\.dat$\|.*grastate\.dat$\|.*\.err$\|.*\.log$\|.*RPM_UPGRADE_MARKER$\|.*RPM_UPGRADE_HISTORY$') ealgo=$(parse_cnf xtrabackup encrypt "") ekey=$(parse_cnf xtrabackup encrypt-key "") ekeyfile=$(parse_cnf xtrabackup encrypt-key-file "") @@ -297,12 +317,26 @@ read_cnf() ekeyfile=$(parse_cnf sst encrypt-key-file "") fi rlimit=$(parse_cnf sst rlimit "") - uextra=$(parse_cnf sst use_extra 0) + uextra=$(parse_cnf sst use-extra 0) speciald=$(parse_cnf sst sst-special-dirs 1) iopts=$(parse_cnf sst inno-backup-opts "") iapts=$(parse_cnf sst inno-apply-opts "") impts=$(parse_cnf sst inno-move-opts "") stimeout=$(parse_cnf sst sst-initial-timeout 100) + ssyslog=$(parse_cnf sst sst-syslog 0) + ssystag=$(parse_cnf mysqld_safe syslog-tag "${SST_SYSLOG_TAG:-}") + ssystag+="-" + + if [[ $speciald -eq 0 ]];then + wsrep_log_error "sst-special-dirs equal to 0 is not supported, falling back to 1" + speciald=1 + fi + + if [[ $ssyslog -ne -1 ]];then + if my_print_defaults -c $WSREP_SST_OPT_CONF mysqld_safe | tr '_' '-' | grep -q -- "--syslog";then + ssyslog=1 + fi + fi } get_stream() @@ -346,8 +380,7 @@ cleanup_joiner() local estatus=$? if [[ $estatus -ne 0 ]];then wsrep_log_error "Cleanup after exit with status:$estatus" - fi - if [ "${WSREP_SST_OPT_ROLE}" = "joiner" ];then + elif [ "${WSREP_SST_OPT_ROLE}" = "joiner" ];then wsrep_log_info "Removing the sst_in_progress file" wsrep_cleanup_progress_file fi @@ -358,6 +391,23 @@ cleanup_joiner() if [[ -n ${STATDIR:-} ]];then [[ -d $STATDIR ]] && rm -rf $STATDIR fi + + # Final cleanup + pgid=$(ps -o pgid= $$ | grep -o '[0-9]*') + + # This means no setsid done in mysqld. + # We don't want to kill mysqld here otherwise. + if [[ $$ -eq $pgid ]];then + + # This means a signal was delivered to the process. + # So, more cleanup. + if [[ $estatus -ge 128 ]];then + kill -KILL -$$ || true + fi + + fi + + exit $estatus } check_pid() @@ -398,6 +448,24 @@ cleanup_donor() if [[ -n $itmpdir ]];then [[ -d $itmpdir ]] && rm -rf $itmpdir || true fi + + # Final cleanup + pgid=$(ps -o pgid= $$ | grep -o '[0-9]*') + + # This means no setsid done in mysqld. + # We don't want to kill mysqld here otherwise. + if [[ $$ -eq $pgid ]];then + + # This means a signal was delivered to the process. + # So, more cleanup. + if [[ $estatus -ge 128 ]];then + kill -KILL -$$ || true + fi + + fi + + exit $estatus + } kill_xtrabackup() @@ -414,6 +482,7 @@ setup_ports() SST_PORT=$(echo $WSREP_SST_OPT_ADDR | awk -F '[:/]' '{ print $2 }') REMOTEIP=$(echo $WSREP_SST_OPT_ADDR | awk -F ':' '{ print $1 }') lsn=$(echo $WSREP_SST_OPT_ADDR | awk -F '[:/]' '{ print $4 }') + sst_ver=$(echo $WSREP_SST_OPT_ADDR | awk -F '[:/]' '{ print $5 }') else SST_PORT=$(echo ${WSREP_SST_OPT_ADDR} | awk -F ':' '{ print $2 }') fi @@ -431,11 +500,7 @@ wait_for_listen() ss -p state listening "( sport = :$PORT )" | grep -qE 'socat|nc' && break sleep 0.2 done - if [[ $incremental -eq 1 ]];then - echo "ready ${ADDR}/${MODULE}/$lsn" - else - echo "ready ${ADDR}/${MODULE}" - fi + echo "ready ${ADDR}/${MODULE}//$sst_ver" } check_extra() @@ -443,7 +508,7 @@ check_extra() local use_socket=1 if [[ $uextra -eq 1 ]];then if $MY_PRINT_DEFAULTS -c $WSREP_SST_OPT_CONF mysqld | tr '_' '-' | grep -- "--thread-handling=" | grep -q 'pool-of-threads';then - local eport=$($MY_PRINT_DEFAULTS -c $WSREP_SST_OPT_CONF mysqld | tr '_' '-' | grep -- "--extra-port=" | cut -d= -f2) + local eport=$(my_print_defaults -c $WSREP_SST_OPT_CONF mysqld | tr '_' '-' | grep -- "--extra-port=" | cut -d= -f2) if [[ -n $eport ]];then # Xtrabackup works only locally. # Hence, setting host to 127.0.0.1 unconditionally. @@ -468,8 +533,14 @@ recv_joiner() local dir=$1 local msg=$2 local tmt=$3 + local checkf=$4 local ltcmd + if [[ ! -d ${dir} ]];then + # This indicates that IST is in progress + return + fi + pushd ${dir} 1>/dev/null set +e @@ -477,7 +548,7 @@ recv_joiner() if timeout --help | grep -q -- '-k';then ltcmd="timeout -k $(( tmt+10 )) $tmt $tcmd" else - ltcmd="timeout $tmt $tcmd" + ltcmd="timeout -s9 $tmt $tcmd" fi timeit "$msg" "$ltcmd | $strmcmd; RC=( "\${PIPESTATUS[@]}" )" else @@ -500,7 +571,7 @@ recv_joiner() fi done - if [ ! -r "${MAGIC_FILE}" ];then + if [[ $checkf -eq 1 && ! -r "${MAGIC_FILE}" ]];then # this message should cause joiner to abort wsrep_log_error "xtrabackup process ended without creating '${MAGIC_FILE}'" wsrep_log_info "Contents of datadir" @@ -546,18 +617,52 @@ fi read_cnf setup_ports -get_stream -get_transfer -if ${INNOBACKUPEX_BIN} /tmp --help | grep -- '--version-check' >/dev/null ; then +if ${INNOBACKUPEX_BIN} /tmp --help 2>/dev/null | grep -q -- '--version-check'; then disver="--no-version-check" fi +if [[ ${FORCE_FTWRL:-0} -eq 1 ]];then + wsrep_log_info "Forcing FTWRL due to environment variable FORCE_FTWRL equal to $FORCE_FTWRL" + iopts+=" --no-backup-locks " +fi + INNOEXTRA="" -INNOAPPLY="${INNOBACKUPEX_BIN} $disver $iapts --apply-log \$rebuildcmd \${DATA} &>\${DATA}/innobackup.prepare.log" -INNOMOVE="${INNOBACKUPEX_BIN} --defaults-file=${WSREP_SST_OPT_CONF} $disver $impts --move-back --force-non-empty-directories \${DATA} &>\${DATA}/innobackup.move.log" -INNOBACKUP="${INNOBACKUPEX_BIN} --defaults-file=${WSREP_SST_OPT_CONF} $disver $iopts \$tmpopts \$INNOEXTRA --galera-info --stream=\$sfmt \$itmpdir 2>\${DATA}/innobackup.backup.log" + +if [[ $ssyslog -eq 1 ]];then + + if [[ ! -x `which logger` ]];then + wsrep_log_error "logger not in path: $PATH. Ignoring" + else + + wsrep_log_info "Logging all stderr of SST/Innobackupex to syslog" + + exec 2> >(logger -p daemon.err -t ${ssystag}wsrep-sst-$WSREP_SST_OPT_ROLE) + + wsrep_log_error() + { + logger -p daemon.err -t ${ssystag}wsrep-sst-$WSREP_SST_OPT_ROLE "$@" + } + + wsrep_log_info() + { + logger -p daemon.info -t ${ssystag}wsrep-sst-$WSREP_SST_OPT_ROLE "$@" + } + + INNOAPPLY="${INNOBACKUPEX_BIN} $disver $iapts --apply-log \$rebuildcmd \${DATA} 2>&1 | logger -p daemon.err -t ${ssystag}innobackupex-apply " + INNOMOVE="${INNOBACKUPEX_BIN} --defaults-file=${WSREP_SST_OPT_CONF} $disver $impts --move-back --force-non-empty-directories \${DATA} 2>&1 | logger -p daemon.err -t ${ssystag}innobackupex-move " + INNOBACKUP="${INNOBACKUPEX_BIN} --defaults-file=${WSREP_SST_OPT_CONF} $disver $iopts \$tmpopts \$INNOEXTRA --galera-info --stream=\$sfmt \$itmpdir 2> >(logger -p daemon.err -t ${ssystag}innobackupex-backup)" + fi + +else + INNOAPPLY="${INNOBACKUPEX_BIN} $disver $iapts --apply-log \$rebuildcmd \${DATA} &>\${DATA}/innobackup.prepare.log" + INNOMOVE="${INNOBACKUPEX_BIN} --defaults-file=${WSREP_SST_OPT_CONF} --defaults-group=mysqld${WSREP_SST_OPT_CONF_SUFFIX} $disver $impts --move-back --force-non-empty-directories \${DATA} &>\${DATA}/innobackup.move.log" + INNOBACKUP="${INNOBACKUPEX_BIN} --defaults-file=${WSREP_SST_OPT_CONF} --defaults-group=mysqld${WSREP_SST_OPT_CONF_SUFFIX} $disver $iopts \$tmpopts \$INNOEXTRA --galera-info --stream=\$sfmt \$itmpdir 2>\${DATA}/innobackup.backup.log" +fi + +get_stream +get_transfer if [ "$WSREP_SST_OPT_ROLE" = "donor" ] then @@ -565,6 +670,11 @@ then if [ $WSREP_SST_OPT_BYPASS -eq 0 ] then + if [[ -z $sst_ver ]];then + wsrep_log_error "Upgrade joiner to 5.6.21 or higher for backup locks support" + wsrep_log_error "The joiner is not supported for this version of donor" + exit 93 + fi if [[ -z $(parse_cnf mysqld tmpdir "") && -z $(parse_cnf xtrabackup tmpdir "") ]];then xtmpdir=$(mktemp -d) @@ -596,9 +706,6 @@ then fi fi - if [[ -n $lsn ]];then - INNOEXTRA+=" --incremental --incremental-lsn=$lsn " - fi check_extra @@ -686,29 +793,12 @@ then [[ -e $SST_PROGRESS_FILE ]] && wsrep_log_info "Stale sst_in_progress file: $SST_PROGRESS_FILE" [[ -n $SST_PROGRESS_FILE ]] && touch $SST_PROGRESS_FILE - if [[ $speciald -eq 1 ]];then - wsrep_log_info "WARNING: sst-special-dirs feature requires PXC 2.1.6 or latter." - fi - - if [[ $speciald -eq 1 ]];then - ib_home_dir=$(parse_cnf mysqld innodb-data-home-dir "") - ib_log_dir=$(parse_cnf mysqld innodb-log-group-home-dir "") - if [[ -z $ib_home_dir && -z $ib_log_dir ]];then - speciald=0 - fi - fi + ib_home_dir=$(parse_cnf mysqld innodb-data-home-dir "") + ib_log_dir=$(parse_cnf mysqld innodb-log-group-home-dir "") + ib_undo_dir=$(parse_cnf mysqld innodb-undo-directory "") stagemsg="Joiner-Recv" - if [[ ! -e ${DATA}/ibdata1 ]];then - incremental=0 - fi - - if [[ $incremental -eq 1 ]];then - wsrep_log_info "Incremental SST enabled: NOT SUPPORTED yet" - lsn=$(grep to_lsn xtrabackup_checkpoints | cut -d= -f2 | tr -d ' ') - wsrep_log_info "Recovered LSN: $lsn" - fi sencrypted=1 nthreads=1 @@ -737,12 +827,6 @@ then tcmd+=" | $pcmd" fi - if [[ $incremental -eq 1 ]];then - BDATA=$DATA - DATA=$(mktemp -d) - MAGIC_FILE="${DATA}/${INFO_FILE}" - fi - get_keys if [[ $encrypt -eq 1 && $sencrypted -eq 1 ]];then if [[ -n $sdecomp ]];then @@ -756,7 +840,8 @@ then STATDIR=$(mktemp -d) MAGIC_FILE="${STATDIR}/${INFO_FILE}" - recv_joiner $STATDIR "${stagemsg}-gtid" $stimeout + recv_joiner $STATDIR "${stagemsg}-gtid" $stimeout 1 + if ! ps -p ${WSREP_SST_OPT_PARENT} &>/dev/null then @@ -766,50 +851,48 @@ then if [ ! -r "${STATDIR}/${IST_FILE}" ] then - wsrep_log_info "Proceeding with SST" - if [[ $speciald -eq 1 && -d ${DATA}/.sst ]];then - wsrep_log_info "WARNING: Stale temporary SST directory: ${DATA}/.sst from previous SST" + if [[ -d ${DATA}/.sst ]];then + wsrep_log_info "WARNING: Stale temporary SST directory: ${DATA}/.sst from previous state transfer" fi + mkdir -p ${DATA}/.sst + (recv_joiner $DATA/.sst "${stagemsg}-SST" 0 0) & + jpid=$! + wsrep_log_info "Proceeding with SST" - if [[ $incremental -ne 1 ]];then - if [[ $speciald -eq 1 ]];then - wsrep_log_info "Cleaning the existing datadir and innodb-data/log directories" - find $ib_home_dir $ib_log_dir $DATA -mindepth 1 -regex $cpat -prune -o -exec rm -rfv {} 1>&2 \+ - else - wsrep_log_info "Cleaning the existing datadir" - find $DATA -mindepth 1 -regex $cpat -prune -o -exec rm -rfv {} 1>&2 \+ - fi - tempdir=$(parse_cnf mysqld log-bin "") - if [[ -n ${tempdir:-} ]];then - binlog_dir=$(dirname $tempdir) - binlog_file=$(basename $tempdir) - if [[ -n ${binlog_dir:-} && $binlog_dir != '.' && $binlog_dir != $DATA ]];then - pattern="$binlog_dir/$binlog_file\.[0-9]+$" - wsrep_log_info "Cleaning the binlog directory $binlog_dir as well" - find $binlog_dir -maxdepth 1 -type f -regex $pattern -exec rm -fv {} 1>&2 \+ - rm $binlog_dir/*.index || true - fi - fi - else - wsrep_log_info "Removing existing ib_logfile files" - rm -f ${BDATA}/ib_logfile* + wsrep_log_info "Cleaning the existing datadir and innodb-data/log directories" + find $ib_home_dir $ib_log_dir $ib_undo_dir $DATA -mindepth 1 -regex $cpat -prune -o -exec rm -rfv {} 1>&2 \+ + + tempdir=$(parse_cnf mysqld log-bin "") + if [[ -n ${tempdir:-} ]];then + binlog_dir=$(dirname $tempdir) + binlog_file=$(basename $tempdir) + if [[ -n ${binlog_dir:-} && $binlog_dir != '.' && $binlog_dir != $DATA ]];then + pattern="$binlog_dir/$binlog_file\.[0-9]+$" + wsrep_log_info "Cleaning the binlog directory $binlog_dir as well" + find $binlog_dir -maxdepth 1 -type f -regex $pattern -exec rm -fv {} 1>&2 \+ || true + rm $binlog_dir/*.index || true + fi fi - if [[ $speciald -eq 1 ]];then - mkdir -p ${DATA}/.sst - TDATA=${DATA} - DATA="${DATA}/.sst" - fi + + TDATA=${DATA} + DATA="${DATA}/.sst" MAGIC_FILE="${DATA}/${INFO_FILE}" - recv_joiner $DATA "${stagemsg}-SST" 0 + wsrep_log_info "Waiting for SST streaming to complete!" + wait $jpid get_proc + if [[ ! -s ${DATA}/xtrabackup_checkpoints ]];then + wsrep_log_error "xtrabackup_checkpoints missing, failed innobackupex/SST on donor" + exit 2 + fi + # Rebuild indexes for compact backups if grep -q 'compact = 1' ${DATA}/xtrabackup_checkpoints;then wsrep_log_info "Index compaction detected" @@ -864,12 +947,24 @@ then fi fi - if [[ $incremental -eq 1 ]];then - # Added --ibbackup=xtrabackup_55 because it fails otherwise citing connection issues. - INNOAPPLY="${INNOBACKUPEX_BIN} --defaults-file=${WSREP_SST_OPT_CONF} \ - --ibbackup=xtrabackup_55 --apply-log $rebuildcmd --redo-only $BDATA --incremental-dir=${DATA} &>>${BDATA}/innobackup.prepare.log" + + if [[ ! -z $WSREP_SST_OPT_BINLOG ]];then + + BINLOG_DIRNAME=$(dirname $WSREP_SST_OPT_BINLOG) + BINLOG_FILENAME=$(basename $WSREP_SST_OPT_BINLOG) + + # To avoid comparing data directory and BINLOG_DIRNAME + mv $DATA/${BINLOG_FILENAME}.* $BINLOG_DIRNAME/ 2>/dev/null || true + + pushd $BINLOG_DIRNAME &>/dev/null + for bfiles in $(ls -1 ${BINLOG_FILENAME}.*);do + echo ${BINLOG_DIRNAME}/${bfiles} >> ${BINLOG_FILENAME}.index + done + popd &> /dev/null + fi + wsrep_log_info "Preparing the backup at ${DATA}" timeit "Xtrabackup prepare stage" "$INNOAPPLY" @@ -879,28 +974,22 @@ then exit 22 fi - if [[ $speciald -eq 1 ]];then - MAGIC_FILE="${TDATA}/${INFO_FILE}" - set +e - rm $TDATA/innobackup.prepare.log $TDATA/innobackup.move.log - set -e - wsrep_log_info "Moving the backup to ${TDATA}" - timeit "Xtrabackup move stage" "$INNOMOVE" - if [[ $? -eq 0 ]];then - wsrep_log_info "Move successful, removing ${DATA}" - rm -rf $DATA - DATA=${TDATA} - else - wsrep_log_error "Move failed, keeping ${DATA} for further diagnosis" - wsrep_log_error "Check ${DATA}/innobackup.move.log for details" - fi + MAGIC_FILE="${TDATA}/${INFO_FILE}" + set +e + rm $TDATA/innobackup.prepare.log $TDATA/innobackup.move.log + set -e + wsrep_log_info "Moving the backup to ${TDATA}" + timeit "Xtrabackup move stage" "$INNOMOVE" + if [[ $? -eq 0 ]];then + wsrep_log_info "Move successful, removing ${DATA}" + rm -rf $DATA + DATA=${TDATA} + else + wsrep_log_error "Move failed, keeping ${DATA} for further diagnosis" + wsrep_log_error "Check ${DATA}/innobackup.move.log for details" + exit 22 fi - if [[ $incremental -eq 1 ]];then - wsrep_log_info "Cleaning up ${DATA} after incremental SST" - [[ -d ${DATA} ]] && rm -rf ${DATA} - DATA=$BDATA - fi else wsrep_log_info "${IST_FILE} received from donor: Running IST" @@ -910,6 +999,7 @@ then wsrep_log_error "SST magic file ${MAGIC_FILE} not found/readable" exit 2 fi + wsrep_log_info "Galera co-ords from recovery: $(cat ${MAGIC_FILE})" cat "${MAGIC_FILE}" # output UUID:seqno wsrep_log_info "Total time on joiner: $totime seconds" fi |