diff options
57 files changed, 2159 insertions, 720 deletions
diff --git a/cmake/install_macros.cmake b/cmake/install_macros.cmake index 1d77da2710b..0b20435dbaf 100644 --- a/cmake/install_macros.cmake +++ b/cmake/install_macros.cmake @@ -31,6 +31,9 @@ MACRO (INSTALL_DSYM_DIRECTORIES targets) GET_TARGET_PROPERTY(type ${target} TYPE) # It's a dirty hack, but cmake too stupid and mysql cmake files too buggy */ STRING(REPLACE "liblibmysql.dylib" "libmysqlclient.${SHARED_LIB_MAJOR_VERSION}.dylib" location ${location}) + IF(DEBUG_EXTNAME) + STRING(REGEX REPLACE "/mysqld$" "/mysqld-debug" location ${location}) + ENDIF() IF(type MATCHES "EXECUTABLE" OR type MATCHES "MODULE" OR type MATCHES "SHARED_LIBRARY") INSTALL(DIRECTORY "${location}.dSYM" DESTINATION ${ARG_DESTINATION} COMPONENT Debuginfo) ENDIF() diff --git a/cmake/wsrep.cmake b/cmake/wsrep.cmake index a807a729adf..aabf0020fc2 100644 --- a/cmake/wsrep.cmake +++ b/cmake/wsrep.cmake @@ -18,12 +18,12 @@ # so WSREP_VERSION is produced regardless # Set the patch version -SET(WSREP_PATCH_VERSION "9") +SET(WSREP_PATCH_VERSION "10") # MariaDB addition: Revision number of the last revision merged from # codership branch visible in @@visible_comment. # Branch : codership-mysql/5.5 -SET(WSREP_PATCH_REVNO "3928") # Should be updated on every merge. +SET(WSREP_PATCH_REVNO "3968") # Should be updated on every merge. # MariaDB: Obtain patch revision number: # Update WSREP_PATCH_REVNO if WSREP_REV environment variable is set. diff --git a/include/thr_lock.h b/include/thr_lock.h index 4551a3160ff..f05db666da9 100644 --- a/include/thr_lock.h +++ b/include/thr_lock.h @@ -22,7 +22,7 @@ extern "C" { #endif #ifdef WITH_WSREP #include <my_sys.h> - typedef int (* wsrep_thd_is_brute_force_fun)(void *); + typedef my_bool (* wsrep_thd_is_brute_force_fun)(void *, my_bool); typedef int (* wsrep_abort_thd_fun)(void *, void *, my_bool); typedef int (* wsrep_on_fun)(void *); void wsrep_thr_lock_init( diff --git a/mysys/thr_lock.c b/mysys/thr_lock.c index 4f7c727594b..59c08240bca 100644 --- a/mysys/thr_lock.c +++ b/mysys/thr_lock.c @@ -690,7 +690,7 @@ wsrep_break_lock( { if (wsrep_on(data->owner->mysql_thd) && wsrep_thd_is_brute_force && - wsrep_thd_is_brute_force(data->owner->mysql_thd)) + wsrep_thd_is_brute_force(data->owner->mysql_thd, TRUE)) { THR_LOCK_DATA *holder; @@ -715,7 +715,7 @@ wsrep_break_lock( holder; holder=holder->next) { - if (!wsrep_thd_is_brute_force(holder->owner->mysql_thd)) + if (!wsrep_thd_is_brute_force(holder->owner->mysql_thd, TRUE)) { wsrep_abort_thd(data->owner->mysql_thd, holder->owner->mysql_thd, FALSE); @@ -731,7 +731,7 @@ wsrep_break_lock( holder; holder=holder->next) { - if (!wsrep_thd_is_brute_force(holder->owner->mysql_thd)) + if (!wsrep_thd_is_brute_force(holder->owner->mysql_thd, TRUE)) { wsrep_abort_thd(data->owner->mysql_thd, holder->owner->mysql_thd, FALSE); diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt index af11b759f4e..d5c20f1fc74 100644 --- a/scripts/CMakeLists.txt +++ b/scripts/CMakeLists.txt @@ -321,7 +321,13 @@ IF(WIN32) ENDFOREACH() ELSE() IF(WITH_WSREP) - SET(WSREP_BINARIES wsrep_sst_common wsrep_sst_mysqldump wsrep_sst_rsync wsrep_sst_xtrabackup) + SET(WSREP_BINARIES + wsrep_sst_common + wsrep_sst_mysqldump + wsrep_sst_rsync + wsrep_sst_xtrabackup + wsrep_sst_xtrabackup-v2 + ) ENDIF() # On Unix, most of the files end up in the bin directory SET(BIN_SCRIPTS diff --git a/scripts/wsrep_sst_common.sh b/scripts/wsrep_sst_common.sh index 30303de0779..f9a08c1c695 100644 --- a/scripts/wsrep_sst_common.sh +++ b/scripts/wsrep_sst_common.sh @@ -28,7 +28,7 @@ case "$1" in shift ;; '--auth') - readonly WSREP_SST_OPT_AUTH="$2" + WSREP_SST_OPT_AUTH="$2" shift ;; '--bypass') @@ -87,13 +87,21 @@ shift done readonly WSREP_SST_OPT_BYPASS -if [ -n "$WSREP_SST_OPT_DATA" ] +# For Bug:1200727 +if my_print_defaults -c $WSREP_SST_OPT_CONF sst | grep -q "wsrep_sst_auth";then + if [ -z $WSREP_SST_OPT_AUTH -o $WSREP_SST_OPT_AUTH = "(null)" ];then + WSREP_SST_OPT_AUTH=$(my_print_defaults -c $WSREP_SST_OPT_CONF sst | grep -- "--wsrep_sst_auth" | cut -d= -f2) + fi +fi + +if [ -n "${WSREP_SST_OPT_DATA:-}" ] then SST_PROGRESS_FILE="$WSREP_SST_OPT_DATA/sst_in_progress" else SST_PROGRESS_FILE="" fi + wsrep_log() { # echo everything to stderr so that it gets into common error log diff --git a/scripts/wsrep_sst_rsync.sh b/scripts/wsrep_sst_rsync.sh index b3535480c67..4b78071542c 100755 --- a/scripts/wsrep_sst_rsync.sh +++ b/scripts/wsrep_sst_rsync.sh @@ -67,12 +67,22 @@ check_pid_and_port() MAGIC_FILE="$WSREP_SST_OPT_DATA/rsync_sst_complete" rm -rf "$MAGIC_FILE" -SCRIPT_DIR=$(cd "$(dirname "$0")"; pwd -P) -WSREP_LOG_DIR=${WSREP_LOG_DIR:-$($SCRIPT_DIR/my_print_defaults --defaults-file "$WSREP_SST_OPT_CONF" mysqld server mysqld-5.5 \ - | grep -- '--innodb[-_]log[-_]group[-_]home[-_]dir=' | cut -b 29- )} -if [ -n "${WSREP_LOG_DIR:-""}" ]; then +WSREP_LOG_DIR=${WSREP_LOG_DIR:-""} + +# if WSREP_LOG_DIR env. variable is not set, try to get it from my.cnf +if [ -z "$WSREP_LOG_DIR" ]; then + SCRIPT_DIR="$(cd $(dirname "$0"); pwd -P)" + WSREP_LOG_DIR=$($SCRIPT_DIR/my_print_defaults --defaults-file \ + "$WSREP_SST_OPT_CONF" mysqld server mysqld-5.5 \ + | grep -- '--innodb[-_]log[-_]group[-_]home[-_]dir=' \ + | cut -b 29- ) +fi + +if [ -n "$WSREP_LOG_DIR" ]; then + # handle both relative and absolute paths WSREP_LOG_DIR=$(cd $WSREP_SST_OPT_DATA; mkdir -p "$WSREP_LOG_DIR"; cd $WSREP_LOG_DIR; pwd -P) else + # default to datadir WSREP_LOG_DIR=$(cd $WSREP_SST_OPT_DATA; pwd -P) fi @@ -158,7 +168,7 @@ then find . -maxdepth 1 -mindepth 1 -type d -print0 | xargs -I{} -0 -P $count \ rsync --owner --group --perms --links --specials \ --ignore-times --inplace --recursive --delete --quiet \ - $WHOLE_FILE_OPT --exclude '*/ib_logfile*' "$WSREP_SST_OPT_DATA"/{}/ \ + $WHOLE_FILE_OPT --exclude '*/ib_logfile*' "$WSREP_SST_OPT_DATA"/{}/ \ rsync://$WSREP_SST_OPT_ADDR/{} >&2 || RC=$? popd >/dev/null @@ -208,8 +218,6 @@ then trap "exit 3" INT TERM ABRT trap cleanup_joiner EXIT - MYUID=$(id -u) - MYGID=$(id -g) RSYNC_CONF="$WSREP_SST_OPT_DATA/$MODULE.conf" cat << EOF > "$RSYNC_CONF" @@ -217,8 +225,6 @@ pid file = $RSYNC_PID use chroot = no read only = no timeout = 300 -uid = $MYUID -gid = $MYGID [$MODULE] path = $WSREP_SST_OPT_DATA [$MODULE-log_dir] diff --git a/scripts/wsrep_sst_xtrabackup-v2.sh b/scripts/wsrep_sst_xtrabackup-v2.sh new file mode 100644 index 00000000000..c296c903007 --- /dev/null +++ b/scripts/wsrep_sst_xtrabackup-v2.sh @@ -0,0 +1,875 @@ +#!/bin/bash -ue +# Copyright (C) 2013 Percona Inc +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; see the file COPYING. If not, write to the +# Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston +# MA 02110-1301 USA. + +# Documentation: http://www.percona.com/doc/percona-xtradb-cluster/manual/xtrabackup_sst.html +# Make sure to read that before proceeding! + + + + +. $(dirname $0)/wsrep_sst_common + +ealgo="" +ekey="" +ekeyfile="" +encrypt=0 +nproc=1 +ecode=0 +XTRABACKUP_PID="" +SST_PORT="" +REMOTEIP="" +tcert="" +tpem="" +tkey="" +sockopt="" +progress="" +ttime=0 +totime=0 +lsn="" +incremental=0 +ecmd="" +rlimit="" +# Initially +stagemsg="${WSREP_SST_OPT_ROLE}" +cpat="" +speciald=0 +ib_home_dir="" +ib_log_dir="" + +sfmt="tar" +strmcmd="" +tfmt="" +tcmd="" +rebuild=0 +rebuildcmd="" +payload=0 +pvformat="-F '%N => Rate:%r Avg:%a Elapsed:%t %e Bytes: %b %p' " +pvopts="-f -i 10 -N $WSREP_SST_OPT_ROLE " +STATDIR="" +uextra=0 +disver="" + +scomp="" +sdecomp="" + +if which pv &>/dev/null && pv --help | grep -q FORMAT;then + pvopts+=$pvformat +fi +pcmd="pv $pvopts" +declare -a RC + +INNOBACKUPEX_BIN=innobackupex +readonly AUTH=(${WSREP_SST_OPT_AUTH//:/ }) +DATA="${WSREP_SST_OPT_DATA}" +INFO_FILE="xtrabackup_galera_info" +IST_FILE="xtrabackup_ist" +MAGIC_FILE="${DATA}/${INFO_FILE}" + +# Setting the path for ss and ip +export PATH="/usr/sbin:/sbin:$PATH" + +timeit(){ + local stage=$1 + shift + local cmd="$@" + local x1 x2 took extcode + + if [[ $ttime -eq 1 ]];then + x1=$(date +%s) + wsrep_log_info "Evaluating $cmd" + eval "$cmd" + extcode=$? + x2=$(date +%s) + took=$(( x2-x1 )) + wsrep_log_info "NOTE: $stage took $took seconds" + totime=$(( totime+took )) + else + wsrep_log_info "Evaluating $cmd" + eval "$cmd" + extcode=$? + fi + return $extcode +} + +get_keys() +{ + # $encrypt -eq 1 is for internal purposes only + if [[ $encrypt -ge 2 || $encrypt -eq -1 ]];then + return + fi + + if [[ $encrypt -eq 0 ]];then + if my_print_defaults -c $WSREP_SST_OPT_CONF xtrabackup | grep -q encrypt;then + wsrep_log_error "Unexpected option combination. SST may fail. Refer to http://www.percona.com/doc/percona-xtradb-cluster/manual/xtrabackup_sst.html " + fi + return + fi + + if [[ $sfmt == 'tar' ]];then + wsrep_log_info "NOTE: Xtrabackup-based encryption - encrypt=1 - cannot be enabled with tar format" + encrypt=-1 + return + fi + + wsrep_log_info "Xtrabackup based encryption enabled in my.cnf - Supported only from Xtrabackup 2.1.4" + + if [[ -z $ealgo ]];then + wsrep_log_error "FATAL: Encryption algorithm empty from my.cnf, bailing out" + exit 3 + fi + + if [[ -z $ekey && ! -r $ekeyfile ]];then + wsrep_log_error "FATAL: Either key or keyfile must be readable" + exit 3 + fi + + if [[ -z $ekey ]];then + ecmd="xbcrypt --encrypt-algo=$ealgo --encrypt-key-file=$ekeyfile" + else + ecmd="xbcrypt --encrypt-algo=$ealgo --encrypt-key=$ekey" + fi + + if [[ "$WSREP_SST_OPT_ROLE" == "joiner" ]];then + ecmd+=" -d" + fi + + stagemsg+="-XB-Encrypted" +} + +get_transfer() +{ + if [[ -z $SST_PORT ]];then + TSST_PORT=4444 + else + TSST_PORT=$SST_PORT + fi + + if [[ $tfmt == 'nc' ]];then + if [[ ! -x `which nc` ]];then + wsrep_log_error "nc(netcat) not found in path: $PATH" + exit 2 + fi + wsrep_log_info "Using netcat as streamer" + if [[ "$WSREP_SST_OPT_ROLE" == "joiner" ]];then + tcmd="nc -dl ${TSST_PORT}" + else + tcmd="nc ${REMOTEIP} ${TSST_PORT}" + fi + else + tfmt='socat' + wsrep_log_info "Using socat as streamer" + if [[ ! -x `which socat` ]];then + wsrep_log_error "socat not found in path: $PATH" + exit 2 + fi + + if [[ $encrypt -eq 2 || $encrypt -eq 3 ]] && ! socat -V | grep -q WITH_OPENSSL;then + wsrep_log_info "NOTE: socat is not openssl enabled, falling back to plain transfer" + encrypt=-1 + fi + + if [[ $encrypt -eq 2 ]];then + wsrep_log_info "Using openssl based encryption with socat: with crt and pem" + if [[ -z $tpem || -z $tcert ]];then + wsrep_log_error "Both PEM and CRT files required" + exit 22 + fi + stagemsg+="-OpenSSL-Encrypted-2" + if [[ "$WSREP_SST_OPT_ROLE" == "joiner" ]];then + wsrep_log_info "Decrypting with PEM $tpem, CA: $tcert" + tcmd="socat -u openssl-listen:${TSST_PORT},reuseaddr,cert=$tpem,cafile=${tcert}${sockopt} stdio" + else + wsrep_log_info "Encrypting with PEM $tpem, CA: $tcert" + tcmd="socat -u stdio openssl-connect:${REMOTEIP}:${TSST_PORT},cert=$tpem,cafile=${tcert}${sockopt}" + fi + elif [[ $encrypt -eq 3 ]];then + wsrep_log_info "Using openssl based encryption with socat: with key and crt" + if [[ -z $tpem || -z $tkey ]];then + wsrep_log_error "Both certificate and key files required" + exit 22 + fi + stagemsg+="-OpenSSL-Encrypted-3" + if [[ "$WSREP_SST_OPT_ROLE" == "joiner" ]];then + wsrep_log_info "Decrypting with certificate $tpem, key $tkey" + tcmd="socat -u openssl-listen:${TSST_PORT},reuseaddr,cert=$tpem,key=${tkey},verify=0${sockopt} stdio" + else + wsrep_log_info "Encrypting with certificate $tpem, key $tkey" + tcmd="socat -u stdio openssl-connect:${REMOTEIP}:${TSST_PORT},cert=$tpem,key=${tkey},verify=0${sockopt}" + fi + + else + if [[ "$WSREP_SST_OPT_ROLE" == "joiner" ]];then + tcmd="socat -u TCP-LISTEN:${TSST_PORT},reuseaddr${sockopt} stdio" + else + tcmd="socat -u stdio TCP:${REMOTEIP}:${TSST_PORT}${sockopt}" + fi + fi + fi + +} + +parse_cnf() +{ + local group=$1 + local var=$2 + reval=$(my_print_defaults -c $WSREP_SST_OPT_CONF $group | awk -F= '{if ($1 ~ /_/) { gsub(/_/,"-",$1); print $1"="$2 } else { print $0 }}' | grep -- "--$var=" | cut -d= -f2-) + if [[ -z $reval ]];then + [[ -n $3 ]] && reval=$3 + fi + echo $reval +} + +get_footprint() +{ + pushd $WSREP_SST_OPT_DATA 1>/dev/null + payload=$(du --block-size=1 -c **/*.ibd **/*.MYI **/*.MYI ibdata1 | awk 'END { print $1 }') + if my_print_defaults -c $WSREP_SST_OPT_CONF xtrabackup | grep -q -- "--compress";then + # QuickLZ has around 50% compression ratio + # When compression/compaction used, the progress is only an approximate. + payload=$(( payload*1/2 )) + fi + popd 1>/dev/null + pcmd+=" -s $payload" + adjust_progress +} + +adjust_progress() +{ + if [[ -n $progress && $progress != '1' ]];then + if [[ -e $progress ]];then + pcmd+=" 2>>$progress" + else + pcmd+=" 2>$progress" + fi + elif [[ -z $progress && -n $rlimit ]];then + # When rlimit is non-zero + pcmd="pv -q" + fi + + if [[ -n $rlimit && "$WSREP_SST_OPT_ROLE" == "donor" ]];then + wsrep_log_info "Rate-limiting SST to $rlimit" + pcmd+=" -L \$rlimit" + fi +} + +read_cnf() +{ + sfmt=$(parse_cnf sst streamfmt "xbstream") + tfmt=$(parse_cnf sst transferfmt "socat") + tcert=$(parse_cnf sst tca "") + tpem=$(parse_cnf sst tcert "") + tkey=$(parse_cnf sst tkey "") + encrypt=$(parse_cnf sst encrypt 0) + sockopt=$(parse_cnf sst sockopt "") + progress=$(parse_cnf sst progress "") + rebuild=$(parse_cnf sst rebuild 0) + ttime=$(parse_cnf sst time 0) + cpat=$(parse_cnf sst cpat '.*galera\.cache$\|.*sst_in_progress$\|.*grastate\.dat$\|.*\.err$\|.*\.log$\|.*RPM_UPGRADE_MARKER$\|.*RPM_UPGRADE_HISTORY$') + incremental=$(parse_cnf sst incremental 0) + ealgo=$(parse_cnf xtrabackup encrypt "") + ekey=$(parse_cnf xtrabackup encrypt-key "") + ekeyfile=$(parse_cnf xtrabackup encrypt-key-file "") + scomp=$(parse_cnf sst compressor "") + sdecomp=$(parse_cnf sst decompressor "") + + + # Refer to http://www.percona.com/doc/percona-xtradb-cluster/manual/xtrabackup_sst.html + if [[ -z $ealgo ]];then + ealgo=$(parse_cnf sst encrypt-algo "") + ekey=$(parse_cnf sst encrypt-key "") + ekeyfile=$(parse_cnf sst encrypt-key-file "") + fi + rlimit=$(parse_cnf sst rlimit "") + uextra=$(parse_cnf sst use-extra 0) + speciald=$(parse_cnf sst sst-special-dirs 1) + iopts=$(parse_cnf sst inno-backup-opts "") + iapts=$(parse_cnf sst inno-apply-opts "") + impts=$(parse_cnf sst inno-move-opts "") +} + +get_stream() +{ + if [[ $sfmt == 'xbstream' ]];then + wsrep_log_info "Streaming with xbstream" + if [[ "$WSREP_SST_OPT_ROLE" == "joiner" ]];then + strmcmd="xbstream -x" + else + strmcmd="xbstream -c \${INFO_FILE}" + fi + else + sfmt="tar" + wsrep_log_info "Streaming with tar" + if [[ "$WSREP_SST_OPT_ROLE" == "joiner" ]];then + strmcmd="tar xfi - " + else + strmcmd="tar cf - \${INFO_FILE} " + fi + + fi +} + +get_proc() +{ + set +e + nproc=$(grep -c processor /proc/cpuinfo) + [[ -z $nproc || $nproc -eq 0 ]] && nproc=1 + set -e +} + +sig_joiner_cleanup() +{ + wsrep_log_error "Removing $MAGIC_FILE file due to signal" + rm -f "$MAGIC_FILE" +} + +cleanup_joiner() +{ + # Since this is invoked just after exit NNN + local estatus=$? + if [[ $estatus -ne 0 ]];then + wsrep_log_error "Cleanup after exit with status:$estatus" + fi + if [ "${WSREP_SST_OPT_ROLE}" = "joiner" ];then + wsrep_log_info "Removing the sst_in_progress file" + wsrep_cleanup_progress_file + fi + if [[ -n $progress && -p $progress ]];then + wsrep_log_info "Cleaning up fifo file $progress" + rm $progress + fi + if [[ -n ${STATDIR:-} ]];then + [[ -d $STATDIR ]] && rm -rf $STATDIR + fi +} + +check_pid() +{ + local pid_file="$1" + [ -r "$pid_file" ] && ps -p $(cat "$pid_file") >/dev/null 2>&1 +} + +cleanup_donor() +{ + # Since this is invoked just after exit NNN + local estatus=$? + if [[ $estatus -ne 0 ]];then + wsrep_log_error "Cleanup after exit with status:$estatus" + fi + + if [[ -n $XTRABACKUP_PID ]];then + if check_pid $XTRABACKUP_PID + then + wsrep_log_error "xtrabackup process is still running. Killing... " + kill_xtrabackup + fi + + rm -f $XTRABACKUP_PID + fi + rm -f ${DATA}/${IST_FILE} + + if [[ -n $progress && -p $progress ]];then + wsrep_log_info "Cleaning up fifo file $progress" + rm $progress + fi +} + +kill_xtrabackup() +{ + local PID=$(cat $XTRABACKUP_PID) + [ -n "$PID" -a "0" != "$PID" ] && kill $PID && (kill $PID && kill -9 $PID) || : + rm -f "$XTRABACKUP_PID" +} + +setup_ports() +{ + if [[ "$WSREP_SST_OPT_ROLE" == "donor" ]];then + SST_PORT=$(echo $WSREP_SST_OPT_ADDR | awk -F '[:/]' '{ print $2 }') + REMOTEIP=$(echo $WSREP_SST_OPT_ADDR | awk -F ':' '{ print $1 }') + lsn=$(echo $WSREP_SST_OPT_ADDR | awk -F '[:/]' '{ print $4 }') + else + SST_PORT=$(echo ${WSREP_SST_OPT_ADDR} | awk -F ':' '{ print $2 }') + fi +} + +# waits ~10 seconds for nc to open the port and then reports ready +# (regardless of timeout) +wait_for_listen() +{ + local PORT=$1 + local ADDR=$2 + local MODULE=$3 + for i in {1..50} + do + ss -p state listening "( sport = :$PORT )" | grep -qE 'socat|nc' && break + sleep 0.2 + done + if [[ $incremental -eq 1 ]];then + echo "ready ${ADDR}/${MODULE}/$lsn" + else + echo "ready ${ADDR}/${MODULE}" + fi +} + +check_extra() +{ + local use_socket=1 + if [[ $uextra -eq 1 ]];then + if my_print_defaults -c $WSREP_SST_OPT_CONF mysqld | tr '_' '-' | grep -- "--thread-handling=" | grep -q 'pool-of-threads';then + local eport=$(my_print_defaults -c $WSREP_SST_OPT_CONF mysqld | tr '_' '-' | grep -- "--extra-port=" | cut -d= -f2) + if [[ -n $eport ]];then + # Xtrabackup works only locally. + # Hence, setting host to 127.0.0.1 unconditionally. + wsrep_log_info "SST through extra_port $eport" + INNOEXTRA+=" --host=127.0.0.1 --port=$eport " + use_socket=0 + else + wsrep_log_error "Extra port $eport null, failing" + exit 1 + fi + else + wsrep_log_info "Thread pool not set, ignore the option use_extra" + fi + fi + if [[ $use_socket -eq 1 ]] && [[ -n "${WSREP_SST_OPT_SOCKET}" ]];then + INNOEXTRA+=" --socket=${WSREP_SST_OPT_SOCKET}" + fi +} + +recv_joiner() +{ + local dir=$1 + local msg=$2 + + pushd ${dir} 1>/dev/null + set +e + timeit "$msg" "$tcmd | $strmcmd; RC=( "\${PIPESTATUS[@]}" )" + set -e + popd 1>/dev/null + + + for ecode in "${RC[@]}";do + if [[ $ecode -ne 0 ]];then + wsrep_log_error "Error while getting data from donor node: " \ + "exit codes: ${RC[@]}" + exit 32 + fi + done + + if [ ! -r "${MAGIC_FILE}" ];then + # this message should cause joiner to abort + wsrep_log_error "xtrabackup process ended without creating '${MAGIC_FILE}'" + wsrep_log_info "Contents of datadir" + wsrep_log_info "$(ls -l ${dir}/*)" + exit 32 + fi +} + + +send_donor() +{ + local dir=$1 + local msg=$2 + + pushd ${dir} 1>/dev/null + set +e + timeit "$msg" "$strmcmd | $tcmd; RC=( "\${PIPESTATUS[@]}" )" + set -e + popd 1>/dev/null + + + for ecode in "${RC[@]}";do + if [[ $ecode -ne 0 ]];then + wsrep_log_error "Error while getting data from donor node: " \ + "exit codes: ${RC[@]}" + exit 32 + fi + done + +} + +if [[ ! -x `which innobackupex` ]];then + wsrep_log_error "innobackupex not in path: $PATH" + exit 2 +fi + +rm -f "${MAGIC_FILE}" + +if [[ ! ${WSREP_SST_OPT_ROLE} == 'joiner' && ! ${WSREP_SST_OPT_ROLE} == 'donor' ]];then + wsrep_log_error "Invalid role ${WSREP_SST_OPT_ROLE}" + exit 22 +fi + +read_cnf +setup_ports +get_stream +get_transfer + +if ${INNOBACKUPEX_BIN} /tmp --help | grep -q -- '--version-check'; then + disver="--no-version-check" +fi + + +INNOEXTRA="" +INNOAPPLY="${INNOBACKUPEX_BIN} $disver $iapts --apply-log \$rebuildcmd \${DATA} &>\${DATA}/innobackup.prepare.log" +INNOMOVE="${INNOBACKUPEX_BIN} --defaults-file=${WSREP_SST_OPT_CONF} $disver $impts --move-back --force-non-empty-directories \${DATA} &>\${DATA}/innobackup.move.log" +INNOBACKUP="${INNOBACKUPEX_BIN} --defaults-file=${WSREP_SST_OPT_CONF} $disver $iopts \$INNOEXTRA --galera-info --stream=\$sfmt \${TMPDIR} 2>\${DATA}/innobackup.backup.log" + +if [ "$WSREP_SST_OPT_ROLE" = "donor" ] +then + trap cleanup_donor EXIT + + if [ $WSREP_SST_OPT_BYPASS -eq 0 ] + then + + TMPDIR="${TMPDIR:-/tmp}" + + if [ "${AUTH[0]}" != "(null)" ]; then + INNOEXTRA+=" --user=${AUTH[0]}" + fi + + if [ ${#AUTH[*]} -eq 2 ]; then + INNOEXTRA+=" --password=${AUTH[1]}" + elif [ "${AUTH[0]}" != "(null)" ]; then + # Empty password, used for testing, debugging etc. + INNOEXTRA+=" --password=" + fi + + get_keys + if [[ $encrypt -eq 1 ]];then + if [[ -n $ekey ]];then + INNOEXTRA+=" --encrypt=$ealgo --encrypt-key=$ekey " + else + INNOEXTRA+=" --encrypt=$ealgo --encrypt-key-file=$ekeyfile " + fi + fi + + if [[ -n $lsn ]];then + INNOEXTRA+=" --incremental --incremental-lsn=$lsn " + fi + + check_extra + + wsrep_log_info "Streaming GTID file before SST" + + echo "${WSREP_SST_OPT_GTID}" > "${MAGIC_FILE}" + + ttcmd="$tcmd" + + if [[ $encrypt -eq 1 ]];then + if [[ -n $scomp ]];then + tcmd=" $ecmd | $scomp | $tcmd " + else + tcmd=" $ecmd | $tcmd " + fi + elif [[ -n $scomp ]];then + tcmd=" $scomp | $tcmd " + fi + + + send_donor $DATA "${stagemsg}-gtid" + + tcmd="$ttcmd" + if [[ -n $progress ]];then + get_footprint + tcmd="$pcmd | $tcmd" + elif [[ -n $rlimit ]];then + adjust_progress + tcmd="$pcmd | $tcmd" + fi + + wsrep_log_info "Sleeping before data transfer for SST" + sleep 10 + + wsrep_log_info "Streaming the backup to joiner at ${REMOTEIP} ${SST_PORT:-4444}" + + if [[ -n $scomp ]];then + tcmd="$scomp | $tcmd" + fi + + set +e + timeit "${stagemsg}-SST" "$INNOBACKUP | $tcmd; RC=( "\${PIPESTATUS[@]}" )" + set -e + + if [ ${RC[0]} -ne 0 ]; then + wsrep_log_error "${INNOBACKUPEX_BIN} finished with error: ${RC[0]}. " \ + "Check ${DATA}/innobackup.backup.log" + exit 22 + elif [[ ${RC[$(( ${#RC[@]}-1 ))]} -eq 1 ]];then + wsrep_log_error "$tcmd finished with error: ${RC[1]}" + exit 22 + fi + + # innobackupex implicitly writes PID to fixed location in ${TMPDIR} + XTRABACKUP_PID="${TMPDIR}/xtrabackup_pid" + + + else # BYPASS FOR IST + + wsrep_log_info "Bypassing the SST for IST" + echo "continue" # now server can resume updating data + echo "${WSREP_SST_OPT_GTID}" > "${MAGIC_FILE}" + echo "1" > "${DATA}/${IST_FILE}" + get_keys + if [[ $encrypt -eq 1 ]];then + if [[ -n $scomp ]];then + tcmd=" $ecmd | $scomp | $tcmd " + else + tcmd=" $ecmd | $tcmd " + fi + elif [[ -n $scomp ]];then + tcmd=" $scomp | $tcmd " + fi + strmcmd+=" \${IST_FILE}" + + send_donor $DATA "${stagemsg}-IST" + + fi + + echo "done ${WSREP_SST_OPT_GTID}" + wsrep_log_info "Total time on donor: $totime seconds" + +elif [ "${WSREP_SST_OPT_ROLE}" = "joiner" ] +then + [[ -e $SST_PROGRESS_FILE ]] && wsrep_log_info "Stale sst_in_progress file: $SST_PROGRESS_FILE" + [[ -n $SST_PROGRESS_FILE ]] && touch $SST_PROGRESS_FILE + + if [[ $speciald -eq 1 ]];then + ib_home_dir=$(parse_cnf mysqld innodb-data-home-dir "") + ib_log_dir=$(parse_cnf mysqld innodb-log-group-home-dir "") + if [[ -z $ib_home_dir && -z $ib_log_dir ]];then + speciald=0 + fi + fi + + stagemsg="Joiner-Recv" + + if [[ ! -e ${DATA}/ibdata1 ]];then + incremental=0 + fi + + if [[ $incremental -eq 1 ]];then + wsrep_log_info "Incremental SST enabled: NOT SUPPORTED yet" + lsn=$(grep to_lsn xtrabackup_checkpoints | cut -d= -f2 | tr -d ' ') + wsrep_log_info "Recovered LSN: $lsn" + fi + + sencrypted=1 + nthreads=1 + + MODULE="xtrabackup_sst" + + rm -f "${DATA}/${IST_FILE}" + + # May need xtrabackup_checkpoints later on + rm -f ${DATA}/xtrabackup_binary ${DATA}/xtrabackup_galera_info ${DATA}/xtrabackup_logfile + + ADDR=${WSREP_SST_OPT_ADDR} + if [ -z "${SST_PORT}" ] + then + SST_PORT=4444 + ADDR="$(echo ${WSREP_SST_OPT_ADDR} | awk -F ':' '{ print $1 }'):${SST_PORT}" + fi + + wait_for_listen ${SST_PORT} ${ADDR} ${MODULE} & + + trap sig_joiner_cleanup HUP PIPE INT TERM + trap cleanup_joiner EXIT + + if [[ -n $progress ]];then + adjust_progress + tcmd+=" | $pcmd" + fi + + if [[ $incremental -eq 1 ]];then + BDATA=$DATA + DATA=$(mktemp -d) + MAGIC_FILE="${DATA}/${INFO_FILE}" + fi + + get_keys + if [[ $encrypt -eq 1 && $sencrypted -eq 1 ]];then + if [[ -n $sdecomp ]];then + strmcmd=" $sdecomp | $ecmd | $strmcmd" + else + strmcmd=" $ecmd | $strmcmd" + fi + elif [[ -n $sdecomp ]];then + strmcmd=" $sdecomp | $strmcmd" + fi + + STATDIR=$(mktemp -d) + MAGIC_FILE="${STATDIR}/${INFO_FILE}" + recv_joiner $STATDIR "${stagemsg}-gtid" 1 + + if ! ps -p ${WSREP_SST_OPT_PARENT} &>/dev/null + then + wsrep_log_error "Parent mysqld process (PID:${WSREP_SST_OPT_PARENT}) terminated unexpectedly." + exit 32 + fi + + if [ ! -r "${STATDIR}/${IST_FILE}" ] + then + wsrep_log_info "Proceeding with SST" + + if [[ $speciald -eq 1 && -d ${DATA}/.sst ]];then + wsrep_log_info "WARNING: Stale temporary SST directory: ${DATA}/.sst from previous SST" + fi + + if [[ $incremental -ne 1 ]];then + if [[ $speciald -eq 1 ]];then + wsrep_log_info "Cleaning the existing datadir and innodb-data/log directories" + find $ib_home_dir $ib_log_dir $DATA -mindepth 1 -regex $cpat -prune -o -exec rm -rfv {} 1>&2 \+ + else + wsrep_log_info "Cleaning the existing datadir" + find $DATA -mindepth 1 -regex $cpat -prune -o -exec rm -rfv {} 1>&2 \+ + fi + tempdir=$(parse_cnf mysqld log-bin "") + if [[ -n ${tempdir:-} ]];then + binlog_dir=$(dirname $tempdir) + binlog_file=$(basename $tempdir) + if [[ -n ${binlog_dir:-} && $binlog_dir != '.' && $binlog_dir != $DATA ]];then + pattern="$binlog_dir/$binlog_file\.[0-9]+$" + wsrep_log_info "Cleaning the binlog directory $binlog_dir as well" + find $binlog_dir -maxdepth 1 -type f -regex $pattern -exec rm -fv {} 1>&2 \+ + rm $binlog_dir/*.index || true + fi + fi + + else + wsrep_log_info "Removing existing ib_logfile files" + rm -f ${BDATA}/ib_logfile* + fi + + + if [[ $speciald -eq 1 ]];then + mkdir -p ${DATA}/.sst + TDATA=${DATA} + DATA="${DATA}/.sst" + fi + + + MAGIC_FILE="${DATA}/${INFO_FILE}" + recv_joiner $DATA "${stagemsg}-SST" 0 + + get_proc + + # Rebuild indexes for compact backups + if grep -q 'compact = 1' ${DATA}/xtrabackup_checkpoints;then + wsrep_log_info "Index compaction detected" + rebuild=1 + fi + + if [[ $rebuild -eq 1 ]];then + nthreads=$(parse_cnf xtrabackup rebuild-threads $nproc) + wsrep_log_info "Rebuilding during prepare with $nthreads threads" + rebuildcmd="--rebuild-indexes --rebuild-threads=$nthreads" + fi + + if test -n "$(find ${DATA} -maxdepth 1 -type f -name '*.qp' -print -quit)";then + + wsrep_log_info "Compressed qpress files found" + + if [[ ! -x `which qpress` ]];then + wsrep_log_error "qpress not found in path: $PATH" + exit 22 + fi + + if [[ -n $progress ]] && pv --help | grep -q 'line-mode';then + count=$(find ${DATA} -type f -name '*.qp' | wc -l) + count=$(( count*2 )) + if pv --help | grep -q FORMAT;then + pvopts="-f -s $count -l -N Decompression -F '%N => Rate:%r Elapsed:%t %e Progress: [%b/$count]'" + else + pvopts="-f -s $count -l -N Decompression" + fi + pcmd="pv $pvopts" + adjust_progress + dcmd="$pcmd | xargs -n 2 qpress -T${nproc}d" + else + dcmd="xargs -n 2 qpress -T${nproc}d" + fi + + + # Decompress the qpress files + wsrep_log_info "Decompression with $nproc threads" + timeit "Joiner-Decompression" "find ${DATA} -type f -name '*.qp' -printf '%p\n%h\n' | $dcmd" + extcode=$? + + if [[ $extcode -eq 0 ]];then + wsrep_log_info "Removing qpress files after decompression" + find ${DATA} -type f -name '*.qp' -delete + if [[ $? -ne 0 ]];then + wsrep_log_error "Something went wrong with deletion of qpress files. Investigate" + fi + else + wsrep_log_error "Decompression failed. Exit code: $extcode" + exit 22 + fi + fi + + if [[ $incremental -eq 1 ]];then + # Added --ibbackup=xtrabackup_55 because it fails otherwise citing connection issues. + INNOAPPLY="${INNOBACKUPEX_BIN} $disver --defaults-file=${WSREP_SST_OPT_CONF} \ + --ibbackup=xtrabackup_55 --apply-log $rebuildcmd --redo-only $BDATA --incremental-dir=${DATA} &>>${BDATA}/innobackup.prepare.log" + fi + + wsrep_log_info "Preparing the backup at ${DATA}" + timeit "Xtrabackup prepare stage" "$INNOAPPLY" + + if [ $? -ne 0 ]; + then + wsrep_log_error "${INNOBACKUPEX_BIN} apply finished with errors. Check ${DATA}/innobackup.prepare.log" + exit 22 + fi + + if [[ $speciald -eq 1 ]];then + MAGIC_FILE="${TDATA}/${INFO_FILE}" + set +e + rm $TDATA/innobackup.prepare.log $TDATA/innobackup.move.log + set -e + wsrep_log_info "Moving the backup to ${TDATA}" + timeit "Xtrabackup move stage" "$INNOMOVE" + if [[ $? -eq 0 ]];then + wsrep_log_info "Move successful, removing ${DATA}" + rm -rf $DATA + DATA=${TDATA} + else + wsrep_log_error "Move failed, keeping ${DATA} for further diagnosis" + wsrep_log_error "Check ${DATA}/innobackup.move.log for details" + fi + fi + + if [[ $incremental -eq 1 ]];then + wsrep_log_info "Cleaning up ${DATA} after incremental SST" + [[ -d ${DATA} ]] && rm -rf ${DATA} + DATA=$BDATA + fi + + else + wsrep_log_info "${IST_FILE} received from donor: Running IST" + fi + + if [[ ! -r ${MAGIC_FILE} ]];then + wsrep_log_error "SST magic file ${MAGIC_FILE} not found/readable" + exit 2 + fi + + cat "${MAGIC_FILE}" # output UUID:seqno + wsrep_log_info "Total time on joiner: $totime seconds" +fi + +exit 0 diff --git a/scripts/wsrep_sst_xtrabackup.sh b/scripts/wsrep_sst_xtrabackup.sh index 3eb747f50c3..33f2f07946e 100644 --- a/scripts/wsrep_sst_xtrabackup.sh +++ b/scripts/wsrep_sst_xtrabackup.sh @@ -34,6 +34,11 @@ # lp:1193240 requires you to manually cleanup the directory prior to SST # # # ############################################################################################################# +# Optional dependencies and options documented here: http://www.percona.com/doc/percona-xtradb-cluster/manual/xtrabackup_sst.html +# Make sure to read that before proceeding! + + + . $(dirname $0)/wsrep_sst_common @@ -44,28 +49,89 @@ encrypt=0 nproc=1 ecode=0 XTRABACKUP_PID="" +SST_PORT="" +REMOTEIP="" +tcert="" +tpem="" +sockopt="" +progress="" +ttime=0 +totime=0 +lsn="" +incremental=0 +ecmd="" +rlimit="" sfmt="tar" strmcmd="" +tfmt="" +tcmd="" +rebuild=0 +rebuildcmd="" +payload=0 +pvformat="-F '%N => Rate:%r Avg:%a Elapsed:%t %e Bytes: %b %p' " +pvopts="-f -i 10 -N $WSREP_SST_OPT_ROLE " +uextra=0 + +if which pv &>/dev/null && pv --help | grep -q FORMAT;then + pvopts+=$pvformat +fi +pcmd="pv $pvopts" declare -a RC +INNOBACKUPEX_BIN=innobackupex +readonly AUTH=(${WSREP_SST_OPT_AUTH//:/ }) +DATA="${WSREP_SST_OPT_DATA}" +INFO_FILE="xtrabackup_galera_info" +IST_FILE="xtrabackup_ist" +MAGIC_FILE="${DATA}/${INFO_FILE}" + +# Setting the path for ss and ip +export PATH="/usr/sbin:/sbin:$PATH" + +timeit(){ + local stage=$1 + shift + local cmd="$@" + local x1 x2 took extcode + + if [[ $ttime -eq 1 ]];then + x1=$(date +%s) + wsrep_log_info "Evaluating $cmd" + eval "$cmd" + extcode=$? + x2=$(date +%s) + took=$(( x2-x1 )) + wsrep_log_info "NOTE: $stage took $took seconds" + totime=$(( totime+took )) + else + wsrep_log_info "Evaluating $cmd" + eval "$cmd" + extcode=$? + fi + return $extcode +} + get_keys() { - # There is no metadata in the stream to indicate that it is encrypted - # So, if the cnf file on joiner contains 'encrypt' under [xtrabackup] section then - # it means encryption is being used - if ! my_print_defaults -c $WSREP_SST_OPT_CONF xtrabackup | grep -q encrypt; then + if [[ $encrypt -eq 2 ]];then + return + fi + + if [[ $encrypt -eq 0 ]];then + if my_print_defaults -c $WSREP_SST_OPT_CONF xtrabackup | grep -q encrypt;then + wsrep_log_error "Unexpected option combination. SST may fail. Refer to http://www.percona.com/doc/percona-xtradb-cluster/manual/xtrabackup_sst.html " + fi return fi + if [[ $sfmt == 'tar' ]];then - wsrep_log_info "NOTE: Encryption cannot be enabled with tar format" + wsrep_log_info "NOTE: Xtrabackup-based encryption - encrypt=1 - cannot be enabled with tar format" + encrypt=0 return fi - wsrep_log_info "Encryption enabled in my.cnf - not supported at the moment - Bug in Xtrabackup - lp:1190343" - ealgo=$(my_print_defaults -c $WSREP_SST_OPT_CONF xtrabackup | grep -- '--encrypt=' | cut -d= -f2) - ekey=$(my_print_defaults -c $WSREP_SST_OPT_CONF xtrabackup | grep -- '--encrypt-key=' | cut -d= -f2) - ekeyfile=$(my_print_defaults -c $WSREP_SST_OPT_CONF xtrabackup | grep -- '--encrypt-key-file=' | cut -d= -f2) + wsrep_log_info "Xtrabackup based encryption enabled in my.cnf - Supported only from Xtrabackup 2.1.4" if [[ -z $ealgo ]];then wsrep_log_error "FATAL: Encryption algorithm empty from my.cnf, bailing out" @@ -76,35 +142,160 @@ get_keys() wsrep_log_error "FATAL: Either key or keyfile must be readable" exit 3 fi - encrypt=1 + + if [[ -z $ekey ]];then + ecmd="xbcrypt --encrypt-algo=$ealgo --encrypt-key-file=$ekeyfile" + else + ecmd="xbcrypt --encrypt-algo=$ealgo --encrypt-key=$ekey" + fi + + if [[ "$WSREP_SST_OPT_ROLE" == "joiner" ]];then + ecmd+=" -d" + fi +} + +get_transfer() +{ + if [[ -z $SST_PORT ]];then + TSST_PORT=4444 + else + TSST_PORT=$SST_PORT + fi + + if [[ $tfmt == 'nc' ]];then + if [[ ! -x `which nc` ]];then + wsrep_log_error "nc(netcat) not found in path: $PATH" + exit 2 + fi + wsrep_log_info "Using netcat as streamer" + if [[ "$WSREP_SST_OPT_ROLE" == "joiner" ]];then + tcmd="nc -dl ${TSST_PORT}" + else + tcmd="nc ${REMOTEIP} ${TSST_PORT}" + fi + else + tfmt='socat' + wsrep_log_info "Using socat as streamer" + if [[ ! -x `which socat` ]];then + wsrep_log_error "socat not found in path: $PATH" + exit 2 + fi + + if [[ $encrypt -eq 2 ]] && ! socat -V | grep -q OPENSSL;then + wsrep_log_info "NOTE: socat is not openssl enabled, falling back to plain transfer" + encrypt=0 + fi + + if [[ $encrypt -eq 2 ]];then + wsrep_log_info "Using openssl based encryption with socat" + if [[ -z $tpem || -z $tcert ]];then + wsrep_log_error "Both PEM and CRT files required" + exit 22 + fi + if [[ "$WSREP_SST_OPT_ROLE" == "joiner" ]];then + wsrep_log_info "Decrypting with PEM $tpem, CA: $tcert" + tcmd="socat -u openssl-listen:${TSST_PORT},reuseaddr,cert=$tpem,cafile=${tcert}${sockopt} stdio" + else + wsrep_log_info "Encrypting with PEM $tpem, CA: $tcert" + tcmd="socat -u stdio openssl-connect:${REMOTEIP}:${TSST_PORT},cert=$tpem,cafile=${tcert}${sockopt}" + fi + else + if [[ "$WSREP_SST_OPT_ROLE" == "joiner" ]];then + tcmd="socat -u TCP-LISTEN:${TSST_PORT},reuseaddr${sockopt} stdio" + else + tcmd="socat -u stdio TCP:${REMOTEIP}:${TSST_PORT}${sockopt}" + fi + fi + fi + +} + +parse_cnf() +{ + local group=$1 + local var=$2 + reval=$(my_print_defaults -c $WSREP_SST_OPT_CONF $group | awk -F= '{if ($1 ~ /_/) { gsub(/_/,"-",$1); print $1"="$2 } else { print $0 }}' | grep -- "--$var=" | cut -d= -f2-) + if [[ -z $reval ]];then + [[ -n $3 ]] && reval=$3 + fi + echo $reval +} + +get_footprint() +{ + pushd $WSREP_SST_OPT_DATA 1>/dev/null + payload=$(du --block-size=1 -c **/*.ibd **/*.MYI **/*.MYI ibdata1 | awk 'END { print $1 }') + if my_print_defaults -c $WSREP_SST_OPT_CONF xtrabackup | grep -q -- "--compress";then + # QuickLZ has around 50% compression ratio + # When compression/compaction used, the progress is only an approximate. + payload=$(( payload*1/2 )) + fi + popd 1>/dev/null + pcmd+=" -s $payload" + adjust_progress +} + +adjust_progress() +{ + if [[ -n $progress && $progress != '1' ]];then + if [[ -e $progress ]];then + pcmd+=" 2>>$progress" + else + pcmd+=" 2>$progress" + fi + elif [[ -z $progress && -n $rlimit ]];then + # When rlimit is non-zero + pcmd="pv -q" + fi + + if [[ -n $rlimit && "$WSREP_SST_OPT_ROLE" == "donor" ]];then + wsrep_log_info "Rate-limiting SST to $rlimit" + pcmd+=" -L \$rlimit" + fi } read_cnf() { - sfmt=$(my_print_defaults -c $WSREP_SST_OPT_CONF sst | grep -- '--streamfmt' | cut -d= -f2) + sfmt=$(parse_cnf sst streamfmt "tar") + tfmt=$(parse_cnf sst transferfmt "socat") + tcert=$(parse_cnf sst tca "") + tpem=$(parse_cnf sst tcert "") + encrypt=$(parse_cnf sst encrypt 0) + sockopt=$(parse_cnf sst sockopt "") + progress=$(parse_cnf sst progress "") + rebuild=$(parse_cnf sst rebuild 0) + ttime=$(parse_cnf sst time 0) + incremental=$(parse_cnf sst incremental 0) + ealgo=$(parse_cnf xtrabackup encrypt "") + ekey=$(parse_cnf xtrabackup encrypt-key "") + ekeyfile=$(parse_cnf xtrabackup encrypt-key-file "") + + # Refer to http://www.percona.com/doc/percona-xtradb-cluster/manual/xtrabackup_sst.html + if [[ -z $ealgo ]];then + ealgo=$(parse_cnf sst encrypt-algo "") + ekey=$(parse_cnf sst encrypt-key "") + ekeyfile=$(parse_cnf sst encrypt-key-file "") + fi + rlimit=$(parse_cnf sst rlimit "") + uextra=$(parse_cnf sst use_extra 0) +} + +get_stream() +{ if [[ $sfmt == 'xbstream' ]];then wsrep_log_info "Streaming with xbstream" if [[ "$WSREP_SST_OPT_ROLE" == "joiner" ]];then - wsrep_log_info "xbstream requires manual cleanup of data directory before SST - lp:1193240" - strmcmd="xbstream -x -C ${DATA}" - elif [[ "$WSREP_SST_OPT_ROLE" == "donor" ]];then - strmcmd="xbstream -c ${INFO_FILE} ${IST_FILE}" + strmcmd="xbstream -x" else - wsrep_log_error "Invalid role: $WSREP_SST_OPT_ROLE" - exit 22 + strmcmd="xbstream -c \${INFO_FILE} \${IST_FILE}" fi else sfmt="tar" wsrep_log_info "Streaming with tar" - wsrep_log_info "Note: Advanced xtrabackup features - encryption,compression etc. not available with tar." if [[ "$WSREP_SST_OPT_ROLE" == "joiner" ]];then - wsrep_log_info "However, xbstream requires manual cleanup of data directory before SST - lp:1193240." - strmcmd="tar xfi - -C ${DATA}" - elif [[ "$WSREP_SST_OPT_ROLE" == "donor" ]];then - strmcmd="tar cf - ${INFO_FILE} ${IST_FILE}" - else - wsrep_log_error "Invalid role: $WSREP_SST_OPT_ROLE" - exit 22 + strmcmd="tar xfi - --recursive-unlink -h" + else + strmcmd="tar cf - \${INFO_FILE} \${IST_FILE}" fi fi @@ -118,6 +309,12 @@ get_proc() set -e } +sig_joiner_cleanup() +{ + wsrep_log_error "Removing $MAGIC_FILE file due to signal" + rm -f "$MAGIC_FILE" +} + cleanup_joiner() { # Since this is invoked just after exit NNN @@ -125,18 +322,14 @@ cleanup_joiner() if [[ $estatus -ne 0 ]];then wsrep_log_error "Cleanup after exit with status:$estatus" fi - local PID=$(ps -aef |grep nc| grep $NC_PORT | awk '{ print $2 }') - if [[ $estatus -ne 0 ]];then - wsrep_log_error "Killing nc pid $PID" - else - wsrep_log_info "Killing nc pid $PID" - fi - [ -n "$PID" -a "0" != "$PID" ] && kill $PID && (kill $PID && kill -9 $PID) || : - rm -f "$MAGIC_FILE" if [ "${WSREP_SST_OPT_ROLE}" = "joiner" ];then wsrep_log_info "Removing the sst_in_progress file" wsrep_cleanup_progress_file fi + if [[ -n $progress && -p $progress ]];then + wsrep_log_info "Cleaning up fifo file $progress" + rm $progress + fi } check_pid() @@ -152,119 +345,161 @@ cleanup_donor() if [[ $estatus -ne 0 ]];then wsrep_log_error "Cleanup after exit with status:$estatus" fi - local pid=$XTRABACKUP_PID - if check_pid "$pid" - then - wsrep_log_error "xtrabackup process is still running. Killing... " - kill_xtrabackup - fi - rm -f "$pid" + if [[ -n $XTRABACKUP_PID ]];then + if check_pid $XTRABACKUP_PID + then + wsrep_log_error "xtrabackup process is still running. Killing... " + kill_xtrabackup + fi + + rm -f $XTRABACKUP_PID + fi rm -f ${DATA}/${IST_FILE} + + if [[ -n $progress && -p $progress ]];then + wsrep_log_info "Cleaning up fifo file $progress" + rm $progress + fi } kill_xtrabackup() { -#set -x local PID=$(cat $XTRABACKUP_PID) [ -n "$PID" -a "0" != "$PID" ] && kill $PID && (kill $PID && kill -9 $PID) || : rm -f "$XTRABACKUP_PID" -#set +x +} + +setup_ports() +{ + if [[ "$WSREP_SST_OPT_ROLE" == "donor" ]];then + SST_PORT=$(echo $WSREP_SST_OPT_ADDR | awk -F '[:/]' '{ print $2 }') + REMOTEIP=$(echo $WSREP_SST_OPT_ADDR | awk -F ':' '{ print $1 }') + lsn=$(echo $WSREP_SST_OPT_ADDR | awk -F '[:/]' '{ print $4 }') + else + SST_PORT=$(echo ${WSREP_SST_OPT_ADDR} | awk -F ':' '{ print $2 }') + fi } # waits ~10 seconds for nc to open the port and then reports ready # (regardless of timeout) -wait_for_nc() +wait_for_listen() { local PORT=$1 local ADDR=$2 local MODULE=$3 - for i in $(seq 1 50) + for i in {1..50} do - netstat -nptl 2>/dev/null | grep '/nc\s*$' | awk '{ print $4 }' | \ - sed 's/.*://' | grep \^${PORT}\$ >/dev/null && break + ss -p state listening "( sport = :$PORT )" | grep -qE 'socat|nc' && break sleep 0.2 done + if [[ $incremental -eq 1 ]];then + echo "ready ${ADDR}/${MODULE}/$lsn" + else echo "ready ${ADDR}/${MODULE}" + fi } -INNOBACKUPEX_BIN=innobackupex -INNOBACKUPEX_ARGS="" -NC_BIN=nc - -for TOOL_BIN in INNOBACKUPEX_BIN NC_BIN ; do - if ! which ${!TOOL_BIN} > /dev/null 2>&1 - then - echo "Can't find ${!TOOL_BIN} in the path" - exit 22 # EINVAL - fi -done - -#ROLE=$1 -#ADDR=$2 -readonly AUTH=(${WSREP_SST_OPT_AUTH//:/ }) -readonly DATA="${WSREP_SST_OPT_DATA}" -#CONF=$5 +check_extra() +{ + local use_socket=1 + if [[ $uextra -eq 1 ]];then + if my_print_defaults -c $WSREP_SST_OPT_CONF mysqld | tr '_' '-' | grep -- "--thread-handling=" | grep -q 'pool-of-threads';then + local eport=$(my_print_defaults -c $WSREP_SST_OPT_CONF mysqld | tr '_' '-' | grep -- "--extra-port=" | cut -d= -f2) + if [[ -n $eport ]];then + # Xtrabackup works only locally. + # Hence, setting host to 127.0.0.1 unconditionally. + wsrep_log_info "SST through extra_port $eport" + INNOEXTRA+=" --host=127.0.0.1 --port=$eport " + use_socket=0 + else + wsrep_log_error "Extra port $eport null, failing" + exit 1 + fi + else + wsrep_log_info "Thread pool not set, ignore the option use_extra" + fi + fi + if [[ $use_socket -eq 1 ]] && [[ -n "${WSREP_SST_OPT_SOCKET}" ]];then + INNOEXTRA+=" --socket=${WSREP_SST_OPT_SOCKET}" + fi +} -INFO_FILE="xtrabackup_galera_info" -IST_FILE="xtrabackup_ist" +if [[ ! -x `which innobackupex` ]];then + wsrep_log_error "innobackupex not in path: $PATH" + exit 2 +fi -MAGIC_FILE="${DATA}/${INFO_FILE}" rm -f "${MAGIC_FILE}" +if [[ ! ${WSREP_SST_OPT_ROLE} == 'joiner' && ! ${WSREP_SST_OPT_ROLE} == 'donor' ]];then + wsrep_log_error "Invalid role ${WSREP_SST_OPT_ROLE}" + exit 22 +fi + read_cnf +setup_ports +get_stream +get_transfer + +INNOEXTRA="" +INNOAPPLY="${INNOBACKUPEX_BIN} --defaults-file=${WSREP_SST_OPT_CONF} --apply-log \$rebuildcmd \${DATA} &>\${DATA}/innobackup.prepare.log" +INNOBACKUP="${INNOBACKUPEX_BIN} --defaults-file=${WSREP_SST_OPT_CONF} \$INNOEXTRA --galera-info --stream=\$sfmt \${TMPDIR} 2>\${DATA}/innobackup.backup.log" if [ "$WSREP_SST_OPT_ROLE" = "donor" ] then trap cleanup_donor EXIT - NC_PORT=$(echo $WSREP_SST_OPT_ADDR | awk -F '[:/]' '{ print $2 }') - REMOTEIP=$(echo $WSREP_SST_OPT_ADDR | awk -F ':' '{ print $1 }') - if [ $WSREP_SST_OPT_BYPASS -eq 0 ] then - TMPDIR="/tmp" - - INNOBACKUPEX_ARGS="--galera-info --stream=$sfmt - --defaults-file=${WSREP_SST_OPT_CONF} - --socket=${WSREP_SST_OPT_SOCKET}" + TMPDIR="${TMPDIR:-/tmp}" if [ "${AUTH[0]}" != "(null)" ]; then - INNOBACKUPEX_ARGS="${INNOBACKUPEX_ARGS} --user=${AUTH[0]}" - fi + INNOEXTRA+=" --user=${AUTH[0]}" + fi if [ ${#AUTH[*]} -eq 2 ]; then - INNOBACKUPEX_ARGS="${INNOBACKUPEX_ARGS} --password=${AUTH[1]}" - else + INNOEXTRA+=" --password=${AUTH[1]}" + elif [ "${AUTH[0]}" != "(null)" ]; then # Empty password, used for testing, debugging etc. - INNOBACKUPEX_ARGS="${INNOBACKUPEX_ARGS} --password=" - fi + INNOEXTRA+=" --password=" + fi get_keys if [[ $encrypt -eq 1 ]];then if [[ -n $ekey ]];then - INNOBACKUPEX_ARGS="${INNOBACKUPEX_ARGS} --encrypt=$ealgo --encrypt-key=$ekey" + INNOEXTRA+=" --encrypt=$ealgo --encrypt-key=$ekey " else - INNOBACKUPEX_ARGS="${INNOBACKUPEX_ARGS} --encrypt=$ealgo --encrypt-key-file=$ekeyfile" + INNOEXTRA+=" --encrypt=$ealgo --encrypt-key-file=$ekeyfile " fi fi - wsrep_log_info "Streaming the backup to joiner at ${REMOTEIP} ${NC_PORT}" + if [[ -n $lsn ]];then + INNOEXTRA+=" --incremental --incremental-lsn=$lsn " + fi - set +e - ${INNOBACKUPEX_BIN} ${INNOBACKUPEX_ARGS} ${TMPDIR} \ - 2> ${DATA}/innobackup.backup.log | \ - ${NC_BIN} ${REMOTEIP} ${NC_PORT} + check_extra + + wsrep_log_info "Streaming the backup to joiner at ${REMOTEIP} ${SST_PORT}" + + if [[ -n $progress ]];then + get_footprint + tcmd="$pcmd | $tcmd" + elif [[ -n $rlimit ]];then + adjust_progress + tcmd="$pcmd | $tcmd" + fi - RC=( "${PIPESTATUS[@]}" ) + set +e + timeit "Donor-Transfer" "$INNOBACKUP | $tcmd; RC=( "\${PIPESTATUS[@]}" )" set -e if [ ${RC[0]} -ne 0 ]; then wsrep_log_error "${INNOBACKUPEX_BIN} finished with error: ${RC[0]}. " \ "Check ${DATA}/innobackup.backup.log" exit 22 - elif [ ${RC[1]} -ne 0 ]; then - wsrep_log_error "${NC_BIN} finished with error: ${RC[1]}" + elif [[ ${RC[$(( ${#RC[@]}-1 ))]} -eq 1 ]];then + wsrep_log_error "$tcmd finished with error: ${RC[1]}" exit 22 fi @@ -272,7 +507,8 @@ then XTRABACKUP_PID="${TMPDIR}/xtrabackup_pid" - else # BYPASS + else # BYPASS FOR IST + wsrep_log_info "Bypassing the SST for IST" STATE="${WSREP_SST_OPT_GTID}" echo "continue" # now server can resume updating data @@ -282,15 +518,9 @@ then pushd ${DATA} 1>/dev/null set +e if [[ $encrypt -eq 1 ]];then - if [[ -n $ekey ]];then - xbstream -c ${INFO_FILE} ${IST_FILE} | xbcrypt --encrypt-algo=$ealgo --encrypt-key=$ekey | ${NC_BIN} ${REMOTEIP} ${NC_PORT} - else - xbstream -c ${INFO_FILE} ${IST_FILE} | xbcrypt --encrypt-algo=$ealgo --encrypt-key-file=$ekeyfile | ${NC_BIN} ${REMOTEIP} ${NC_PORT} - fi - else - $strmcmd | ${NC_BIN} ${REMOTEIP} ${NC_PORT} + tcmd=" $ecmd | $tcmd" fi - RC=( "${PIPESTATUS[@]}" ) + timeit "Donor-IST-Unencrypted-transfer" "$strmcmd | $tcmd; RC=( "\${PIPESTATUS[@]}" )" set -e popd 1>/dev/null @@ -301,69 +531,86 @@ then exit 1 fi done - #rm -f ${DATA}/${IST_FILE} fi echo "done ${WSREP_SST_OPT_GTID}" + wsrep_log_info "Total time on donor: $totime seconds" elif [ "${WSREP_SST_OPT_ROLE}" = "joiner" ] then [[ -e $SST_PROGRESS_FILE ]] && wsrep_log_info "Stale sst_in_progress file: $SST_PROGRESS_FILE" touch $SST_PROGRESS_FILE + if [[ ! -e ${DATA}/ibdata1 ]];then + incremental=0 + fi + + if [[ $incremental -eq 1 ]];then + wsrep_log_info "Incremental SST enabled" + #lsn=$(/pxc/bin/mysqld --defaults-file=$WSREP_SST_OPT_CONF --basedir=/pxc --wsrep-recover 2>&1 | grep -o 'log sequence number .*' | cut -d " " -f 4 | head -1) + lsn=$(grep to_lsn xtrabackup_checkpoints | cut -d= -f2 | tr -d ' ') + wsrep_log_info "Recovered LSN: $lsn" + fi + sencrypted=1 nthreads=1 MODULE="xtrabackup_sst" - rm -f ${DATA}/xtrabackup_* + # May need xtrabackup_checkpoints later on + rm -f ${DATA}/xtrabackup_binary ${DATA}/xtrabackup_galera_info ${DATA}/xtrabackup_logfile ADDR=${WSREP_SST_OPT_ADDR} - NC_PORT=$(echo ${ADDR} | awk -F ':' '{ print $2 }') - if [ -z "${NC_PORT}" ] + if [ -z "${SST_PORT}" ] then - NC_PORT=4444 - ADDR="$(echo ${ADDR} | awk -F ':' '{ print $1 }'):${NC_PORT}" + SST_PORT=4444 + ADDR="$(echo ${WSREP_SST_OPT_ADDR} | awk -F ':' '{ print $1 }'):${SST_PORT}" fi - wait_for_nc ${NC_PORT} ${ADDR} ${MODULE} & + wait_for_listen ${SST_PORT} ${ADDR} ${MODULE} & - trap "exit 32" HUP PIPE - trap "exit 3" INT TERM + trap sig_joiner_cleanup HUP PIPE INT TERM trap cleanup_joiner EXIT + if [[ -n $progress ]];then + adjust_progress + tcmd+=" | $pcmd" + fi + + if [[ $incremental -eq 1 ]];then + BDATA=$DATA + DATA=$(mktemp -d) + MAGIC_FILE="${DATA}/${INFO_FILE}" + fi + get_keys set +e if [[ $encrypt -eq 1 && $sencrypted -eq 1 ]];then - if [[ -n $ekey ]];then - ${NC_BIN} -dl ${NC_PORT} | xbcrypt -d --encrypt-algo=$ealgo --encrypt-key=$ekey | xbstream -x -C ${DATA} - else - ${NC_BIN} -dl ${NC_PORT} | xbcrypt -d --encrypt-algo=$ealgo --encrypt-key-file=$ekeyfile | xbstream -x -C ${DATA} - fi - else - ${NC_BIN} -dl ${NC_PORT} | $strmcmd + strmcmd=" $ecmd | $strmcmd" fi - RC=( "${PIPESTATUS[@]}" ) + + pushd ${DATA} 1>/dev/null + timeit "Joiner-Recv-Unencrypted" "$tcmd | $strmcmd; RC=( "\${PIPESTATUS[@]}" )" + popd 1>/dev/null + set -e if [[ $sfmt == 'xbstream' ]];then # Special handling till lp:1193240 is fixed" if [[ ${RC[$(( ${#RC[@]}-1 ))]} -eq 1 ]];then wsrep_log_error "Xbstream failed" - wsrep_log_error "Data directory ${DATA} needs to be empty for SST: lp:1193240" \ + wsrep_log_error "Data directory ${DATA} may not be empty: lp:1193240" \ "Manual intervention required in that case" exit 32 fi fi - wait %% # join wait_for_nc thread + wait %% # join for wait_for_listen thread for ecode in "${RC[@]}";do if [[ $ecode -ne 0 ]];then wsrep_log_error "Error while getting data from donor node: " \ "exit codes: ${RC[@]}" - wsrep_log_error "Data directory ${DATA} needs to be empty for SST:" \ - "Manual intervention required in that case" exit 32 fi done @@ -372,43 +619,25 @@ then then # this message should cause joiner to abort wsrep_log_error "xtrabackup process ended without creating '${MAGIC_FILE}'" + wsrep_log_info "Contents of datadir" + wsrep_log_info "$(ls -l ${DATA}/**/*)" exit 32 fi - if ! ps -p ${WSREP_SST_OPT_PARENT} >/dev/null + if ! ps -p ${WSREP_SST_OPT_PARENT} &>/dev/null then wsrep_log_error "Parent mysqld process (PID:${WSREP_SST_OPT_PARENT}) terminated unexpectedly." exit 32 fi - if [ ! -r "${IST_FILE}" ] + if [ ! -r "${DATA}/${IST_FILE}" ] then wsrep_log_info "Proceeding with SST" - rebuild="" wsrep_log_info "Removing existing ib_logfile files" - rm -f ${DATA}/ib_logfile* - - # Decrypt only if not encrypted in stream. - # NOT USED NOW. - # Till https://blueprints.launchpad.net/percona-xtrabackup/+spec/add-support-for-rsync-url - # is implemented - #get_keys - if [[ $encrypt -eq 1 && $sencrypted -eq 0 ]];then - # Decrypt the files if any - find ${DATA} -type f -name '*.xbcrypt' -printf '%p\n' | while read line;do - input=$line - output=${input%.xbcrypt} - - if [[ -n $ekey ]];then - xbcrypt -d --encrypt-algo=$ealgo --encrypt-key=$ekey -i $input > $output - else - xbcrypt -d --encrypt-algo=$ealgo --encrypt-key-file=$ekeyfile -i $input > $output - fi - done - - if [[ $? = 0 ]];then - find ${DATA} -type f -name '*.xbcrypt' -delete - fi + if [[ $incremental -ne 1 ]];then + rm -f ${DATA}/ib_logfile* + else + rm -f ${BDATA}/ib_logfile* fi get_proc @@ -416,33 +645,47 @@ then # Rebuild indexes for compact backups if grep -q 'compact = 1' ${DATA}/xtrabackup_checkpoints;then wsrep_log_info "Index compaction detected" - nthreads=$(my_print_defaults -c $WSREP_SST_OPT_CONF xtrabackup | grep -- '--rebuild-threads' | cut -d= -f2) - [[ -z $nthreads ]] && nthreads=$nproc - wsrep_log_info "Rebuilding with $nthreads threads" - rebuild="--rebuild-indexes --rebuild-threads=$nthreads" + rebuild=1 fi - if test -n "$(find ${DATA} -maxdepth 1 -name '*.qp' -print -quit)";then + if [[ $rebuild -eq 1 ]];then + nthreads=$(parse_cnf xtrabackup rebuild-threads $nproc) + wsrep_log_info "Rebuilding during prepare with $nthreads threads" + rebuildcmd="--rebuild-indexes --rebuild-threads=$nthreads" + fi + + if test -n "$(find ${DATA} -maxdepth 1 -type f -name '*.qp' -print -quit)";then wsrep_log_info "Compressed qpress files found" if [[ ! -x `which qpress` ]];then - wsrep_log_error "qpress not found in PATH" + wsrep_log_error "qpress not found in path: $PATH" exit 22 fi - set +e + if [[ -n $progress ]] && pv --help | grep -q 'line-mode';then + count=$(find ${DATA} -type f -name '*.qp' | wc -l) + count=$(( count*2 )) + if pv --help | grep -q FORMAT;then + pvopts="-f -s $count -l -N Decompression -F '%N => Rate:%r Elapsed:%t %e Progress: [%b/$count]'" + else + pvopts="-f -s $count -l -N Decompression" + fi + pcmd="pv $pvopts" + adjust_progress + dcmd="$pcmd | xargs -n 2 qpress -T${nproc}d" + else + dcmd="xargs -n 2 qpress -T${nproc}d" + fi wsrep_log_info "Removing existing ibdata1 file" rm -f ${DATA}/ibdata1 # Decompress the qpress files wsrep_log_info "Decompression with $nproc threads" - find ${DATA} -type f -name '*.qp' -printf '%p\n%h\n' | xargs -P $nproc -n 2 qpress -d + timeit "Decompression" "find ${DATA} -type f -name '*.qp' -printf '%p\n%h\n' | $dcmd" extcode=$? - set -e - if [[ $extcode -eq 0 ]];then wsrep_log_info "Removing qpress files after decompression" find ${DATA} -type f -name '*.qp' -delete @@ -455,10 +698,21 @@ then fi fi + if [[ $incremental -eq 1 ]];then + # Added --ibbackup=xtrabackup_55 because it fails otherwise citing connection issues. + INNOAPPLY="${INNOBACKUPEX_BIN} --defaults-file=${WSREP_SST_OPT_CONF} \ + --ibbackup=xtrabackup_55 --apply-log $rebuildcmd --redo-only $BDATA --incremental-dir=${DATA} &>>${BDATA}/innobackup.prepare.log" + fi + wsrep_log_info "Preparing the backup at ${DATA}" + timeit "Xtrabackup prepare stage" "$INNOAPPLY" + + if [[ $incremental -eq 1 ]];then + wsrep_log_info "Cleaning up ${DATA} after incremental SST" + [[ -d ${DATA} ]] && rm -rf ${DATA} + DATA=$BDATA + fi - ${INNOBACKUPEX_BIN} --defaults-file=${WSREP_SST_OPT_CONF} --apply-log $rebuild \ - ${DATA} 1>&2 2> ${DATA}/innobackup.prepare.log if [ $? -ne 0 ]; then wsrep_log_error "${INNOBACKUPEX_BIN} finished with errors. Check ${DATA}/innobackup.prepare.log" @@ -468,14 +722,13 @@ then wsrep_log_info "${IST_FILE} received from donor: Running IST" fi - cat "${MAGIC_FILE}" # output UUID:seqno - - #Cleanup not required here since EXIT trap should be called - #wsrep_cleanup_progress_file + if [[ ! -r ${MAGIC_FILE} ]];then + wsrep_log_error "SST magic file ${MAGIC_FILE} not found/readable" + exit 2 + fi -else - wsrep_log_error "Unrecognized role: ${WSREP_SST_OPT_ROLE}" - exit 22 # EINVAL + cat "${MAGIC_FILE}" # output UUID:seqno + wsrep_log_info "Total time on joiner: $totime seconds" fi exit 0 diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index 6f8d4b4af14..5c11d18f3f3 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -385,7 +385,13 @@ const char *ha_partition::table_type() const // we can do this since we only support a single engine type return m_file[0]->table_type(); } - +#ifdef WITH_WSREP +int ha_partition::wsrep_db_type() const +{ + // we can do this since we only support a single engine type + return ha_legacy_type(m_file[0]->ht); +} +#endif /* WITH_WSREP */ /* Destructor method diff --git a/sql/ha_partition.h b/sql/ha_partition.h index 05c2bc7072f..75018eca334 100644 --- a/sql/ha_partition.h +++ b/sql/ha_partition.h @@ -1265,6 +1265,7 @@ public: return h; } #ifdef WITH_WSREP + virtual int wsrep_db_type() const; void wsrep_reset_files() { for (uint i=0; i < m_tot_parts; i++) diff --git a/sql/handler.cc b/sql/handler.cc index 40d53170557..9144ab8e92a 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -1148,10 +1148,27 @@ int ha_prepare(THD *thd) { if ((err= ht->prepare(ht, thd, all))) { +#ifdef WITH_WSREP + if (WSREP(thd) && ht->db_type== DB_TYPE_WSREP) + { + error= 1; + /* avoid sending error, if we need to replay */ + if (thd->wsrep_conflict_state!= MUST_REPLAY) + { + my_error(ER_LOCK_DEADLOCK, MYF(0), err); + } + } + else + { + /* not wsrep hton, bail to native mysql behavior */ +#endif my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); ha_rollback_trans(thd, all); error=1; break; +#ifdef WITH_WSREP + } +#endif } } else @@ -1407,16 +1424,25 @@ int ha_commit_trans(THD *thd, bool all) if (WSREP(thd) && ht->db_type== DB_TYPE_WSREP) { error= 1; - /* avoid sending error, if we need to replay */ - if (thd->wsrep_conflict_state!= MUST_REPLAY) + switch (err) { - my_error(ER_LOCK_DEADLOCK, MYF(0), err); + case WSREP_TRX_SIZE_EXCEEDED: + /* give user size exeeded erro from wsrep_api.h */ + my_error(ER_ERROR_DURING_COMMIT, MYF(0), WSREP_SIZE_EXCEEDED); + break; + case WSREP_TRX_CERT_FAIL: + case WSREP_TRX_ERROR: + /* avoid sending error, if we need to replay */ + if (thd->wsrep_conflict_state!= MUST_REPLAY) + { + my_error(ER_LOCK_DEADLOCK, MYF(0), err); + } } } - else - /* not wsrep hton, bail to native mysql behavior */ + else + /* not wsrep hton, bail to native mysql behavior */ #endif /* WITH_WSREP */ - my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); + my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); #ifdef WITH_WSREP } #endif /* WITH_WSREP */ @@ -5699,7 +5725,9 @@ static bool check_table_binlog_row_based(THD *thd, TABLE *table) table->s->cached_row_logging_check && (thd->variables.option_bits & OPTION_BIN_LOG) && #ifdef WITH_WSREP - ((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open())); + /* applier and replayer should not binlog */ + ((WSREP_EMULATE_BINLOG(thd) && (thd->wsrep_exec_mode != REPL_RECV)) || + mysql_bin_log.is_open())); #else mysql_bin_log.is_open()); #endif @@ -5802,6 +5830,17 @@ static int binlog_log_row(TABLE* table, bool error= 0; THD *const thd= table->in_use; +#ifdef WITH_WSREP + /* only InnoDB tables will be replicated through binlog emulation */ + if (WSREP_EMULATE_BINLOG(thd) && + table->file->ht->db_type != DB_TYPE_INNODB && + !(table->file->ht->db_type == DB_TYPE_PARTITION_DB && + (((ha_partition*)(table->file))->wsrep_db_type() == DB_TYPE_INNODB))) + // !strcmp(table->file->table_type(), "InnoDB")) + { + return 0; + } +#endif /* WITH_WSREP */ if (check_table_binlog_row_based(thd, table)) { MY_BITMAP cols; diff --git a/sql/log.cc b/sql/log.cc index a0382d9ad1f..7dcd1548fbb 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -568,7 +568,7 @@ void thd_binlog_trx_reset(THD * thd) binlog_cache_mngr *const cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); if (cache_mngr) cache_mngr->reset(false, true); - } + } thd->clear_binlog_table_maps(); } diff --git a/sql/log_event.cc b/sql/log_event.cc index 9a792ad82a0..9e9f2dd25f8 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -3096,7 +3096,12 @@ Query_log_event::Query_log_event(THD* thd_arg, const char* query_arg, { time_t end_time; #ifdef WITH_WSREP - thd->wsrep_PA_safe= false; + /* + If Query_log_event will contain non trans keyword (not BEGIN, COMMIT, + SAVEPOINT or ROLLBACK) we disable PA for this transaction. + */ + if (!is_trans_keyword()) + thd->wsrep_PA_safe= false; #endif /* WITH_WSREP */ memset(&user, 0, sizeof(user)); memset(&host, 0, sizeof(host)); diff --git a/sql/mdl.cc b/sql/mdl.cc index 7ef105b9269..ff53793554a 100644 --- a/sql/mdl.cc +++ b/sql/mdl.cc @@ -22,6 +22,7 @@ #include <mysql/plugin.h> #include <mysql/service_thd_wait.h> #include <mysql/psi/mysql_stage.h> + #ifdef WITH_WSREP #include "wsrep_mysqld.h" #include "wsrep_thd.h" @@ -34,7 +35,6 @@ extern bool wsrep_grant_mdl_exception(MDL_context *requestor_ctx, MDL_ticket *ticket); #endif /* WITH_WSREP */ - #ifdef HAVE_PSI_INTERFACE static PSI_mutex_key key_MDL_map_mutex; static PSI_mutex_key key_MDL_wait_LOCK_wait_status; @@ -1480,7 +1480,7 @@ void MDL_lock::Ticket_list::add_ticket(MDL_ticket *ticket) DBUG_ASSERT(ticket->get_lock()); #ifdef WITH_WSREP if ((this == &(ticket->get_lock()->m_waiting)) && - wsrep_thd_is_brute_force((void *)(ticket->get_ctx()->wsrep_get_thd()))) + wsrep_thd_is_BF((void *)(ticket->get_ctx()->wsrep_get_thd()), false)) { Ticket_iterator itw(ticket->get_lock()->m_waiting); Ticket_iterator itg(ticket->get_lock()->m_granted); @@ -1491,7 +1491,7 @@ void MDL_lock::Ticket_list::add_ticket(MDL_ticket *ticket) while ((waiting= itw++) && !added) { - if (!wsrep_thd_is_brute_force((void *)(waiting->get_ctx()->wsrep_get_thd()))) + if (!wsrep_thd_is_BF((void *)(waiting->get_ctx()->wsrep_get_thd()), true)) { WSREP_DEBUG("MDL add_ticket inserted before: %lu %s", wsrep_thd_thread_id(waiting->get_ctx()->wsrep_get_thd()), @@ -1892,7 +1892,7 @@ MDL_lock::can_grant_lock(enum_mdl_type type_arg, ticket->is_incompatible_when_granted(type_arg)) #ifdef WITH_WSREP { - if (wsrep_thd_is_brute_force((void *)(requestor_ctx->wsrep_get_thd())) && + if (wsrep_thd_is_BF((void *)(requestor_ctx->wsrep_get_thd()),false) && key.mdl_namespace() == MDL_key::GLOBAL) { WSREP_DEBUG("global lock granted for BF: %lu %s", @@ -1933,7 +1933,7 @@ MDL_lock::can_grant_lock(enum_mdl_type type_arg, #ifdef WITH_WSREP else { - if (wsrep_thd_is_brute_force((void *)(requestor_ctx->wsrep_get_thd())) && + if (wsrep_thd_is_BF((void *)(requestor_ctx->wsrep_get_thd()), false) && key.mdl_namespace() == MDL_key::GLOBAL) { WSREP_DEBUG("global lock granted for BF (waiting queue): %lu %s", diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 0997d30d88e..a0d7b346854 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -76,7 +76,6 @@ #include "wsrep_var.h" #include "wsrep_thd.h" #include "wsrep_sst.h" -ulong wsrep_running_threads = 0; // # of currently running wsrep threads #endif #include "sql_callback.h" #include "threadpool.h" @@ -2747,11 +2746,7 @@ static void network_init(void) @note For the connection that is doing shutdown, this is called twice */ -#ifdef WITH_WSREP -void close_connection(THD *thd, uint sql_errno, bool lock) -#else void close_connection(THD *thd, uint sql_errno) -#endif { DBUG_ENTER("close_connection"); @@ -2984,7 +2979,11 @@ bool one_thread_per_connection_end(THD *thd, bool put_in_cache) unlink_thd(thd); /* Mark that current_thd is not valid anymore */ set_current_thd(0); +#ifdef WITH_WSREP + if (put_in_cache && cache_thread() && !thd->wsrep_applier) +#else if (put_in_cache && cache_thread()) +#endif /* WITH_WSREP */ DBUG_RETURN(0); // Thread is reused /* @@ -3428,10 +3427,21 @@ pthread_handler_t signal_hand(void *arg __attribute__((unused))) should not be any other mysql_cond_signal() calls. */ mysql_mutex_lock(&LOCK_thread_count); - mysql_mutex_unlock(&LOCK_thread_count); mysql_cond_broadcast(&COND_thread_count); + mysql_mutex_unlock(&LOCK_thread_count); + + /* + Waiting for until mysqld_server_started != 0 + to ensure that all server components has been successfully + initialized. This step is mandatory since signal processing + could be done safely only when all server components + has been initialized. + */ + mysql_mutex_lock(&LOCK_server_started); + while (!mysqld_server_started) + mysql_cond_wait(&COND_server_started, &LOCK_server_started); + mysql_mutex_unlock(&LOCK_server_started); - (void) pthread_sigmask(SIG_BLOCK,&set,NULL); for (;;) { int error; // Used when debugging @@ -4879,42 +4889,8 @@ will be ignored as the --log-bin option is not defined."); } #endif -#ifdef WITH_WSREP /* WSREP BEFORE SE */ - if (!wsrep_recovery) - { - if (opt_bootstrap) // bootsrap option given - disable wsrep functionality - { - wsrep_provider_init(WSREP_NONE); - if (wsrep_init()) unireg_abort(1); - } - else // full wsrep initialization - { - // add basedir/bin to PATH to resolve wsrep script names - char* const tmp_path((char*)alloca(strlen(mysql_home) + - strlen("/bin") + 1)); - if (tmp_path) - { - strcpy(tmp_path, mysql_home); - strcat(tmp_path, "/bin"); - wsrep_prepend_PATH(tmp_path); - } - else - { - WSREP_ERROR("Could not append %s/bin to PATH", mysql_home); - } DBUG_ASSERT(!opt_bin_log || opt_bin_logname); - if (wsrep_before_SE()) - { -#ifndef EMBEDDED_LIBRARY - set_ports(); // this is also called in network_init() later but we need - // to know mysqld_port now - lp:1071882 -#endif /* !EMBEDDED_LIBRARY */ - wsrep_init_startup(true); - } - } - } -#endif /* WITH_WSREP */ if (opt_bin_log) { /* Reports an error and aborts, if the --log-bin's path @@ -4962,10 +4938,67 @@ a file name for --log-bin-index option", opt_binlog_index_name); { opt_bin_logname= my_once_strdup(buf, MYF(MY_WME)); } +#ifdef WITH_WSREP /* WSREP BEFORE SE */ + /* + Wsrep initialization must happen at this point, because: + - opt_bin_logname must be known when starting replication + since SST may need it + - SST may modify binlog index file, so it must be opened + after SST has happened + */ + } + if (!wsrep_recovery) + { + if (opt_bootstrap) // bootsrap option given - disable wsrep functionality + { + wsrep_provider_init(WSREP_NONE); + if (wsrep_init()) unireg_abort(1); + } + else // full wsrep initialization + { + // add basedir/bin to PATH to resolve wsrep script names + char* const tmp_path((char*)alloca(strlen(mysql_home) + + strlen("/bin") + 1)); + if (tmp_path) + { + strcpy(tmp_path, mysql_home); + strcat(tmp_path, "/bin"); + wsrep_prepend_PATH(tmp_path); + } + else + { + WSREP_ERROR("Could not append %s/bin to PATH", mysql_home); + } + + if (wsrep_before_SE()) + { + set_ports(); // this is also called in network_init() later but we need + // to know mysqld_port now - lp:1071882 + wsrep_init_startup(true); + } + } + } + if (opt_bin_log) + { + /* + Variable ln is not defined at this scope. We use opt_bin_logname instead. + It should be the same as ln since + - mysql_bin_log.generate_name() returns first argument if new log name + is not generated + - if new log name is generated, return value is assigned to ln and copied + to opt_bin_logname above + */ + if (mysql_bin_log.open_index_file(opt_binlog_index_name, opt_bin_logname, + TRUE)) + { + unireg_abort(1); + } +#else if (mysql_bin_log.open_index_file(opt_binlog_index_name, ln, TRUE)) { unireg_abort(1); } +#endif /* WITH_WSREP */ } /* call ha_init_key_cache() on all key caches to init them */ @@ -5262,7 +5295,7 @@ pthread_handler_t start_wsrep_THD(void *arg) thd->thr_create_utime= microsecond_interval_timer(); if (MYSQL_CALLBACK_ELSE(thread_scheduler, init_new_connection_thread, (), 0)) { - close_connection(thd, ER_OUT_OF_RESOURCES, 1); + close_connection(thd, ER_OUT_OF_RESOURCES); statistic_increment(aborted_connects,&LOCK_status); MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 0)); @@ -5285,7 +5318,7 @@ pthread_handler_t start_wsrep_THD(void *arg) thd->thread_stack= (char*) &thd; if (thd->store_globals()) { - close_connection(thd, ER_OUT_OF_RESOURCES, 1); + close_connection(thd, ER_OUT_OF_RESOURCES); statistic_increment(aborted_connects,&LOCK_status); MYSQL_CALLBACK(thread_scheduler, end_thread, (thd, 0)); delete thd; @@ -5307,19 +5340,9 @@ pthread_handler_t start_wsrep_THD(void *arg) ++connection_count; mysql_mutex_unlock(&LOCK_connection_count); - mysql_mutex_lock(&LOCK_thread_count); - wsrep_running_threads++; - mysql_cond_signal(&COND_thread_count); - mysql_mutex_unlock(&LOCK_thread_count); - processor(thd); - close_connection(thd, 0, 1); - - mysql_mutex_lock(&LOCK_thread_count); - wsrep_running_threads--; - mysql_cond_signal(&COND_thread_count); - mysql_mutex_unlock(&LOCK_thread_count); + close_connection(thd, 0); // Note: We can't call THD destructor without crashing // if plugins have not been initialized. However, in most of the @@ -5337,6 +5360,7 @@ pthread_handler_t start_wsrep_THD(void *arg) // at server shutdown } + my_thread_end(); if (thread_handling > SCHEDULER_ONE_THREAD_PER_CONNECTION) { mysql_mutex_lock(&LOCK_thread_count); @@ -5550,7 +5574,7 @@ void wsrep_close_client_connections(my_bool wait_to_end) !is_replaying_connection(tmp)) { WSREP_INFO("killing local connection: %ld",tmp->thread_id); - close_connection(tmp,0,0); + close_connection(tmp,0); } #endif } @@ -6644,11 +6668,7 @@ void create_thread_to_handle_connection(THD *thd) my_snprintf(error_message_buff, sizeof(error_message_buff), ER_THD(thd, ER_CANT_CREATE_THREAD), error); net_send_error(thd, ER_CANT_CREATE_THREAD, error_message_buff, NULL); -#ifdef WITH_WSREP - close_connection(thd, ER_OUT_OF_RESOURCES ,0); -#else close_connection(thd, ER_OUT_OF_RESOURCES); -#endif /* WITH_WSREP */ mysql_mutex_lock(&LOCK_thread_count); thd->unlink(); @@ -6694,11 +6714,7 @@ static void create_new_thread(THD *thd) mysql_mutex_unlock(&LOCK_connection_count); DBUG_PRINT("error",("Too many connections")); -#ifdef WITH_WSREP - close_connection(thd, ER_CON_COUNT_ERROR, 1); -#else close_connection(thd, ER_CON_COUNT_ERROR); -#endif /* WITH_WSREP */ statistic_increment(denied_connections, &LOCK_status); delete thd; statistic_increment(connection_errors_max_connection, &LOCK_status); @@ -7109,11 +7125,7 @@ pthread_handler_t handle_connections_namedpipes(void *arg) if (!(thd->net.vio= vio_new_win32pipe(hConnectedPipe)) || my_net_init(&thd->net, thd->net.vio, MYF(MY_THREAD_SPECIFIC))) { -#ifdef WITH_WSREP - close_connection(thd, ER_OUT_OF_RESOURCES, 1); -#else close_connection(thd, ER_OUT_OF_RESOURCES); -#endif delete thd; set_current_thd(0); continue; @@ -7311,11 +7323,7 @@ pthread_handler_t handle_connections_shared_memory(void *arg) event_conn_closed)) || my_net_init(&thd->net, thd->net.vio, MYF(MY_THREAD_SPECIFIC))) { -#ifdef WITH_WSREP - close_connection(thd, ER_OUT_OF_RESOURCES, 1); -#else close_connection(thd, ER_OUT_OF_RESOURCES); -#endif errmsg= 0; goto errorconn; } diff --git a/sql/mysqld.h b/sql/mysqld.h index c443d4eed62..1a1183d1819 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -55,11 +55,7 @@ typedef Bitmap<((MAX_INDEXES+7)/8*8)> key_map; /* Used for finding keys */ some places */ /* Function prototypes */ void kill_mysql(void); -#ifdef WITH_WSREP -void close_connection(THD *thd, uint sql_errno= 0, bool lock=1); -#else void close_connection(THD *thd, uint sql_errno= 0); -#endif /* WITH_WSREP */ void handle_connection_in_main_thread(THD *thd); void create_thread_to_handle_connection(THD *thd); void delete_running_thd(THD *thd); diff --git a/sql/slave.cc b/sql/slave.cc index ecfd048fd26..ecdc6c5d32f 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -4651,6 +4651,22 @@ err_during_init: DBUG_EXECUTE_IF("simulate_slave_delay_at_terminate_bug38694", sleep(5);); mysql_mutex_unlock(&rli->run_lock); // tell the world we are done +#ifdef WITH_WSREP + /* if slave stopped due to node going non primary, we set global flag to + trigger automatic restart of slave when node joins back to cluster + */ + if (WSREP_ON && !wsrep_ready) + { + WSREP_INFO("Slave thread stopped because node dropped from cluster"); + if (wsrep_restart_slave) + { + WSREP_INFO("wsrep_restart_slave was set and therefore slave will be " + "automatically restarted when node joins back to cluster"); + wsrep_restart_slave_activated= TRUE; + } + } +#endif /* WITH_WSREP */ + DBUG_LEAVE; // Must match DBUG_ENTER() my_thread_end(); #ifdef HAVE_OPENSSL diff --git a/sql/sql_base.cc b/sql/sql_base.cc index f1479d47fcd..986d2b77288 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -8945,7 +8945,7 @@ bool mysql_notify_thread_having_shared_lock(THD *thd, THD *in_use, #ifdef WITH_WSREP { signalled|= mysql_lock_abort_for_thread(thd, thd_table); - if (thd && WSREP(thd) && wsrep_thd_is_brute_force((void *)thd)) + if (thd && WSREP(thd) && wsrep_thd_is_BF((void *)thd, true)) { WSREP_DEBUG("remove_table_from_cache: %llu", (unsigned long long) thd->real_id); diff --git a/sql/sql_class.cc b/sql/sql_class.cc index d979df6dc33..7b48d86603e 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -66,6 +66,7 @@ #include "lock.h" #ifdef WITH_WSREP #include "wsrep_mysqld.h" +#include "wsrep_thd.h" #endif #include "sql_connect.h" @@ -958,6 +959,10 @@ extern "C" void wsrep_thd_awake(THD *thd, my_bool signal) mysql_mutex_unlock(&LOCK_wsrep_replaying); } } +extern "C" int wsrep_thd_retry_counter(THD *thd) +{ + return(thd->wsrep_retry_counter); +} extern int wsrep_trx_order_before(void *thd1, void *thd2) @@ -2129,7 +2134,19 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use, (e.g. see partitioning code). */ if (!thd_table->needs_reopen()) +#ifdef WITH_WSREP + { + signalled|= mysql_lock_abort_for_thread(this, thd_table); + if (this && WSREP(this) && wsrep_thd_is_BF((void *)this, FALSE)) + { + WSREP_DEBUG("remove_table_from_cache: %llu", + (unsigned long long) this->real_id); + wsrep_abort_thd((void *)this, (void *)in_use, FALSE); + } + } +#else signalled|= mysql_lock_abort_for_thread(this, thd_table); +#endif } mysql_mutex_unlock(&in_use->LOCK_thd_data); } diff --git a/sql/sql_connect.cc b/sql/sql_connect.cc index 5acc8b1e473..49d9ae5a76f 100644 --- a/sql/sql_connect.cc +++ b/sql/sql_connect.cc @@ -1102,11 +1102,7 @@ bool setup_connection_thread_globals(THD *thd) { if (thd->store_globals()) { -#ifdef WITH_WSREP - close_connection(thd, ER_OUT_OF_RESOURCES, 1); -#else close_connection(thd, ER_OUT_OF_RESOURCES); -#endif statistic_increment(aborted_connects,&LOCK_status); MYSQL_CALLBACK(thd->scheduler, end_thread, (thd, 0)); return 1; // Error @@ -1351,11 +1347,7 @@ void do_handle_one_connection(THD *thd_arg) if (MYSQL_CALLBACK_ELSE(thd->scheduler, init_new_connection_thread, (), 0)) { -#ifdef WITH_WSREP - close_connection(thd, ER_OUT_OF_RESOURCES, 1); -#else close_connection(thd, ER_OUT_OF_RESOURCES); -#endif statistic_increment(aborted_connects,&LOCK_status); MYSQL_CALLBACK(thd->scheduler, end_thread, (thd, 0)); return; @@ -1413,13 +1405,9 @@ void do_handle_one_connection(THD *thd_arg) thd->wsrep_query_state= QUERY_EXITING; mysql_mutex_unlock(&thd->LOCK_wsrep_thd); } -#endif +#endif end_thread: -#ifdef WITH_WSREP - close_connection(thd, 0, 1); -#else close_connection(thd); -#endif if (thd->userstat_running) update_global_user_stats(thd, create_user, time(NULL)); diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 04b20a7d40f..04cdcd10c56 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -820,11 +820,7 @@ void do_handle_bootstrap(THD *thd) if (my_thread_init() || thd->store_globals()) { #ifndef EMBEDDED_LIBRARY -#ifdef WITH_WSREP - close_connection(thd, ER_OUT_OF_RESOURCES, 1); -#else close_connection(thd, ER_OUT_OF_RESOURCES); -#endif /* WITH_WSREP */ #endif thd->fatal_error(); goto end; @@ -2736,7 +2732,9 @@ mysql_execute_command(THD *thd) if (trans_commit_implicit(thd)) { thd->mdl_context.release_transactional_locks(); +#ifdef WITH_WSREP WSREP_DEBUG("implicit commit failed, MDL released: %lu", thd->thread_id); +#endif /* WITH_WSREP */ goto error; } /* Release metadata locks acquired in this transaction. */ @@ -4758,7 +4756,9 @@ end_with_restore_list: if (trans_begin(thd, lex->start_transaction_opt)) { thd->mdl_context.release_transactional_locks(); +#ifdef WITH_WSREP WSREP_DEBUG("BEGIN failed, MDL released: %lu", thd->thread_id); +#endif /* WITH_WSREP */ goto error; } my_ok(thd); @@ -4776,7 +4776,9 @@ end_with_restore_list: if (trans_commit(thd)) { thd->mdl_context.release_transactional_locks(); +#ifdef WITH_WSREP WSREP_DEBUG("COMMIT failed, MDL released: %lu", thd->thread_id); +#endif /* WITH_WSREP */ goto error; } thd->mdl_context.release_transactional_locks(); @@ -4802,9 +4804,9 @@ end_with_restore_list: if (WSREP(thd)) { if (thd->wsrep_conflict_state == NO_CONFLICT || - thd->wsrep_conflict_state == REPLAYING) + thd->wsrep_conflict_state == REPLAYING) { - my_ok(thd); + my_ok(thd); } } else { #endif /* WITH_WSREP */ @@ -4828,7 +4830,9 @@ end_with_restore_list: if (trans_rollback(thd)) { thd->mdl_context.release_transactional_locks(); +#ifdef WITH_WSREP WSREP_DEBUG("rollback failed, MDL released: %lu", thd->thread_id); +#endif /* WITH_WSREP */ goto error; } thd->mdl_context.release_transactional_locks(); @@ -4850,7 +4854,7 @@ end_with_restore_list: #ifdef WITH_WSREP if (WSREP(thd)) { if (thd->wsrep_conflict_state == NO_CONFLICT) { - my_ok(thd); + my_ok(thd); } } else { #endif /* WITH_WSREP */ @@ -5375,7 +5379,9 @@ create_sp_error: if (trans_xa_commit(thd)) { thd->mdl_context.release_transactional_locks(); +#ifdef WITH_WSREP WSREP_DEBUG("XA commit failed, MDL released: %lu", thd->thread_id); +#endif /* WITH_WSREP */ goto error; } thd->mdl_context.release_transactional_locks(); @@ -5391,7 +5397,9 @@ create_sp_error: if (trans_xa_rollback(thd)) { thd->mdl_context.release_transactional_locks(); +#ifdef WITH_WSREP WSREP_DEBUG("XA rollback failed, MDL released: %lu", thd->thread_id); +#endif /* WITH_WSREP */ goto error; } thd->mdl_context.release_transactional_locks(); @@ -7930,7 +7938,7 @@ kill_one_thread(THD *thd, longlong id, killed_state kill_signal, killed_type typ #ifdef WITH_WSREP if (((thd->security_ctx->master_access & SUPER_ACL) || thd->security_ctx->user_matches(tmp->security_ctx)) && - !wsrep_thd_is_brute_force((void *)tmp)) + !wsrep_thd_is_BF((void *)tmp, true)) #else if ((thd->security_ctx->master_access & SUPER_ACL) || thd->security_ctx->user_matches(tmp->security_ctx)) diff --git a/sql/sql_partition_admin.cc b/sql/sql_partition_admin.cc index 8c59febeb77..ac67291fbd1 100644 --- a/sql/sql_partition_admin.cc +++ b/sql/sql_partition_admin.cc @@ -763,6 +763,19 @@ bool Sql_cmd_alter_table_truncate_partition::execute(THD *thd) if (check_one_table_access(thd, DROP_ACL, first_table)) DBUG_RETURN(TRUE); +#ifdef WITH_WSREP + TABLE *find_temporary_table(THD *thd, const TABLE_LIST *tl); + + if ((!thd->is_current_stmt_binlog_format_row() || + !find_temporary_table(thd, first_table)) && + wsrep_to_isolation_begin( + thd, first_table->db, first_table->table_name, NULL) + ) + { + WSREP_WARN("ALTER TABLE isolation failure"); + DBUG_RETURN(TRUE); + } +#endif /* WITH_WSREP */ if (open_tables(thd, &first_table, &table_counter, 0)) DBUG_RETURN(true); diff --git a/sql/sql_truncate.cc b/sql/sql_truncate.cc index 47d3e6df71f..fd2d20c1813 100644 --- a/sql/sql_truncate.cc +++ b/sql/sql_truncate.cc @@ -455,6 +455,12 @@ bool Sql_cmd_truncate_table::truncate_table(THD *thd, TABLE_LIST *table_ref) { bool hton_can_recreate; +#ifdef WITH_WSREP + if (WSREP(thd) && wsrep_to_isolation_begin(thd, + table_ref->db, + table_ref->table_name, NULL)) + DBUG_RETURN(TRUE); +#endif /* WITH_WSREP */ if (lock_table(thd, table_ref, &hton_can_recreate)) DBUG_RETURN(TRUE); @@ -531,12 +537,6 @@ bool Sql_cmd_truncate_table::execute(THD *thd) if (check_one_table_access(thd, DROP_ACL, first_table)) DBUG_RETURN(res); -#ifdef WITH_WSREP - if (WSREP(thd) && wsrep_to_isolation_begin(thd, - first_table->db, - first_table->table_name, NULL)) - DBUG_RETURN(TRUE); -#endif /* WITH_WSREP */ if (! (res= truncate_table(thd, first_table))) my_ok(thd); DBUG_RETURN(res); diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 415afa49cb2..345fc9f1dfa 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -3314,7 +3314,9 @@ static bool fix_autocommit(sys_var *self, THD *thd, enum_var_type type) { thd->variables.option_bits&= ~OPTION_AUTOCOMMIT; thd->mdl_context.release_transactional_locks(); +#ifdef WITH_WSREP WSREP_DEBUG("autocommit, MDL TRX lock released: %lu", thd->thread_id); +#endif /* WITH_WSREP */ return true; } /* @@ -4646,6 +4648,10 @@ static Sys_var_mybool Sys_wsrep_load_data_splitting( "transaction after every 10K rows inserted", GLOBAL_VAR(wsrep_load_data_splitting), CMD_LINE(OPT_ARG), DEFAULT(TRUE)); + +static Sys_var_mybool Sys_wsrep_restart_slave( + "wsrep_restart_slave", "Should MySQL slave be restarted automatically, when node joins back to cluster", + GLOBAL_VAR(wsrep_restart_slave), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); #endif /* WITH_WSREP */ static bool fix_host_cache_size(sys_var *, THD *, enum_var_type) diff --git a/sql/wsrep_binlog.cc b/sql/wsrep_binlog.cc index a913da8df17..62c62cb56f0 100644 --- a/sql/wsrep_binlog.cc +++ b/sql/wsrep_binlog.cc @@ -55,8 +55,8 @@ int wsrep_write_cache_buf(IO_CACHE *cache, uchar **buf, size_t *buf_len) wsrep_max_ws_size, total_length); goto error; } - uchar* tmp = (uchar *)my_realloc(*buf, total_length, - MYF(MY_ALLOW_ZERO_PTR)); + + uchar* tmp = (uchar *)my_realloc(*buf, total_length, MYF(0)); if (!tmp) { WSREP_ERROR("could not (re)allocate buffer: %zu + %u", @@ -72,7 +72,7 @@ int wsrep_write_cache_buf(IO_CACHE *cache, uchar **buf, size_t *buf_len) if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) { - WSREP_ERROR("failed to initialize io-cache"); + WSREP_WARN("failed to initialize io-cache"); goto cleanup; } @@ -166,19 +166,19 @@ static int wsrep_write_cache_once(wsrep_t* const wsrep, { WSREP_WARN("transaction size limit (%lu) exceeded: %zu", wsrep_max_ws_size, total_length); + err = WSREP_TRX_SIZE_EXCEEDED; goto cleanup; } if (total_length > allocated) { size_t const new_size(heap_size(total_length)); - uchar* tmp = (uchar *)my_realloc(heap_buf, new_size, - MYF(MY_ALLOW_ZERO_PTR)); + uchar* tmp = (uchar *)my_realloc(heap_buf, new_size, MYF(0)); if (!tmp) { WSREP_ERROR("could not (re)allocate buffer: %zu + %u", allocated, length); - err = WSREP_SIZE_EXCEEDED; + err = WSREP_TRX_SIZE_EXCEEDED; goto cleanup; } @@ -233,7 +233,7 @@ static int wsrep_write_cache_inc(wsrep_t* const wsrep, if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) { WSREP_ERROR("failed to initialize io-cache"); - return WSREP_TRX_ROLLBACK; + return WSREP_TRX_ERROR; } int err(WSREP_OK); @@ -254,7 +254,7 @@ static int wsrep_write_cache_inc(wsrep_t* const wsrep, { WSREP_WARN("transaction size limit (%lu) exceeded: %zu", wsrep_max_ws_size, total_length); - err = WSREP_SIZE_EXCEEDED; + err = WSREP_TRX_SIZE_EXCEEDED; goto cleanup; } @@ -349,3 +349,59 @@ int wsrep_binlog_savepoint_rollback(THD *thd, void *sv) int rcode = binlog_hton->savepoint_rollback(binlog_hton, thd, sv); return rcode; } + +void wsrep_dump_rbr_direct(THD* thd, IO_CACHE* cache) +{ + char filename[PATH_MAX]= {0}; + int len= snprintf(filename, PATH_MAX, "%s/GRA_%ld_%lld.log", + wsrep_data_home_dir, thd->thread_id, + (long long)wsrep_thd_trx_seqno(thd)); + size_t bytes_in_cache = 0; + // check path + if (len >= PATH_MAX) + { + WSREP_ERROR("RBR dump path too long: %d, skipping dump.", len); + return ; + } + // init cache + my_off_t const saved_pos(my_b_tell(cache)); + if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) + { + WSREP_ERROR("failed to initialize io-cache"); + return ; + } + // open file + FILE* of = fopen(filename, "wb"); + if (!of) + { + WSREP_ERROR("Failed to open file '%s': %d (%s)", + filename, errno, strerror(errno)); + goto cleanup; + } + // ready to write + bytes_in_cache= my_b_bytes_in_cache(cache); + if (unlikely(bytes_in_cache == 0)) bytes_in_cache = my_b_fill(cache); + if (likely(bytes_in_cache > 0)) do + { + if (my_fwrite(of, cache->read_pos, bytes_in_cache, + MYF(MY_WME | MY_NABP)) == (size_t) -1) + { + WSREP_ERROR("Failed to write file '%s'", filename); + goto cleanup; + } + cache->read_pos= cache->read_end; + } while ((cache->file >= 0) && (bytes_in_cache= my_b_fill(cache))); + if(cache->error == -1) + { + WSREP_ERROR("RBR inconsistent"); + goto cleanup; + } +cleanup: + // init back + if (reinit_io_cache(cache, WRITE_CACHE, saved_pos, 0, 0)) + { + WSREP_ERROR("failed to reinitialize io-cache"); + } + // close file + if (of) fclose(of); +} diff --git a/sql/wsrep_binlog.h b/sql/wsrep_binlog.h index 408cc9b425c..f2ff713cf67 100644 --- a/sql/wsrep_binlog.h +++ b/sql/wsrep_binlog.h @@ -50,4 +50,7 @@ int wsrep_binlog_close_connection(THD* thd); int wsrep_binlog_savepoint_set(THD *thd, void *sv); int wsrep_binlog_savepoint_rollback(THD *thd, void *sv); +/* Dump replication buffer to disk without intermediate buffer */ +void wsrep_dump_rbr_direct(THD* thd, IO_CACHE* cache); + #endif /* WSREP_BINLOG_H */ diff --git a/sql/wsrep_check_opts.cc b/sql/wsrep_check_opts.cc index ce8a46c6bd5..5ec18c79978 100644 --- a/sql/wsrep_check_opts.cc +++ b/sql/wsrep_check_opts.cc @@ -303,19 +303,6 @@ check_opts (int const argc, const char* const argv[], struct opt opts[]) } } - long long query_cache_size, query_cache_type; - if ((err = get_long_long (opts[QUERY_CACHE_SIZE], &query_cache_size, 10))) - return err; - if ((err = get_long_long (opts[QUERY_CACHE_TYPE], &query_cache_type, 10))) - return err; - - if (0 != query_cache_size && 0 != query_cache_type) - { - WSREP_ERROR ("Query cache is not supported (size=%lld type=%lld)", - query_cache_size, query_cache_type); - rcode = EINVAL; - } - bool locked_in_memory; err = get_bool (opts[LOCKED_IN_MEMORY], &locked_in_memory); if (err) { WSREP_ERROR("get_bool error: %s", strerror(err)); return err; } diff --git a/sql/wsrep_hton.cc b/sql/wsrep_hton.cc index efb6ed87179..44f345ef3aa 100644 --- a/sql/wsrep_hton.cc +++ b/sql/wsrep_hton.cc @@ -134,7 +134,7 @@ wsrep_close_connection(handlerton* hton, THD* thd) - certification test or an equivalent. As a result, the current transaction just rolls back Error codes: - WSREP_TRX_ROLLBACK, WSREP_TRX_ERROR + WSREP_TRX_CERT_FAIL, WSREP_TRX_SIZE_EXCEEDED, WSREP_TRX_ERROR - a post-certification failure makes this server unable to commit its own WS and therefore the server must abort */ @@ -155,14 +155,7 @@ static int wsrep_prepare(handlerton *hton, THD *thd, bool all) !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && (thd->variables.wsrep_on && !wsrep_trans_cache_is_empty(thd))) { - switch (wsrep_run_wsrep_commit(thd, hton, all)) - { - case WSREP_TRX_OK: - break; - case WSREP_TRX_ROLLBACK: - case WSREP_TRX_ERROR: - DBUG_RETURN(1); - } + DBUG_RETURN (wsrep_run_wsrep_commit(thd, hton, all)); } DBUG_RETURN(0); } @@ -330,7 +323,7 @@ wsrep_run_wsrep_commit(THD *thd, handlerton *hton, bool all) WSREP_INFO("innobase_commit, abort %s", (thd->query()) ? thd->query() : "void"); } - DBUG_RETURN(WSREP_TRX_ROLLBACK); + DBUG_RETURN(WSREP_TRX_CERT_FAIL); } mysql_mutex_lock(&LOCK_wsrep_replaying); @@ -381,7 +374,7 @@ wsrep_run_wsrep_commit(THD *thd, handlerton *hton, bool all) mysql_mutex_unlock(&thd->LOCK_wsrep_thd); WSREP_DEBUG("innobase_commit abort after replaying wait %s", (thd->query()) ? thd->query() : "void"); - DBUG_RETURN(WSREP_TRX_ROLLBACK); + DBUG_RETURN(WSREP_TRX_CERT_FAIL); } thd->wsrep_query_state = QUERY_COMMITTING; @@ -394,7 +387,7 @@ wsrep_run_wsrep_commit(THD *thd, handlerton *hton, bool all) rcode = wsrep_write_cache(wsrep, thd, cache, &data_len); if (WSREP_OK != rcode) { WSREP_ERROR("rbr write fail, data_len: %zu, %d", data_len, rcode); - DBUG_RETURN(WSREP_TRX_ROLLBACK); + DBUG_RETURN(WSREP_TRX_SIZE_EXCEEDED); } } @@ -517,7 +510,7 @@ wsrep_run_wsrep_commit(THD *thd, handlerton *hton, bool all) } mysql_mutex_unlock(&thd->LOCK_wsrep_thd); - DBUG_RETURN(WSREP_TRX_ROLLBACK); + DBUG_RETURN(WSREP_TRX_CERT_FAIL); case WSREP_CONN_FAIL: WSREP_ERROR("connection failure"); diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc index 8d59db84b7f..35629b1fbf6 100644 --- a/sql/wsrep_mysqld.cc +++ b/sql/wsrep_mysqld.cc @@ -26,6 +26,7 @@ #include <cstdio> #include <cstdlib> #include "log_event.h" +#include <slave.h> wsrep_t *wsrep = NULL; my_bool wsrep_emulate_bin_log = FALSE; // activating parts of binlog interface @@ -63,7 +64,10 @@ ulong wsrep_mysql_replication_bundle = 0; my_bool wsrep_desync = 0; // desynchronize the node from the // cluster my_bool wsrep_load_data_splitting = 1; // commit load data every 10K intervals - +my_bool wsrep_restart_slave = 0; // should mysql slave thread be + // restarted, if node joins back +my_bool wsrep_restart_slave_activated = 0; // node has dropped, and slave + // restart will be needed /* * End configuration options */ @@ -126,7 +130,7 @@ static void wsrep_log_cb(wsrep_log_level_t level, const char *msg) { sql_print_error("WSREP: %s", msg); break; case WSREP_LOG_DEBUG: - if (wsrep_debug) sql_print_information ("[Debug] WSREP: %s", msg); + sql_print_information ("[Debug] WSREP: %s", msg); default: break; } @@ -407,14 +411,32 @@ void wsrep_ready_wait () static void wsrep_synced_cb(void* app_ctx) { WSREP_INFO("Synchronized with group, ready for connections"); + bool signal_main= false; if (mysql_mutex_lock (&LOCK_wsrep_ready)) abort(); if (!wsrep_ready) { wsrep_ready= TRUE; mysql_cond_signal (&COND_wsrep_ready); + signal_main= true; + } local_status.set(WSREP_MEMBER_SYNCED); mysql_mutex_unlock (&LOCK_wsrep_ready); + + if (signal_main) + { + wsrep_SE_init_grab(); + // Signal mysqld init thread to continue + wsrep_sst_complete (&local_uuid, local_seqno, false); + // and wait for SE initialization + wsrep_SE_init_wait(); + } + if (wsrep_restart_slave_activated) + { + WSREP_INFO("MySQL slave restart"); + wsrep_restart_slave_activated= FALSE; + init_slave(); + } } static void wsrep_init_position() @@ -495,7 +517,18 @@ int wsrep_init() wsrep_ready_set(TRUE); wsrep_inited= 1; global_system_variables.wsrep_on = 0; - return 0; + wsrep_init_args args; + args.options = (wsrep_provider_options) ? + wsrep_provider_options : ""; + rcode = wsrep->init(wsrep, &args); + if (rcode) + { + DBUG_PRINT("wsrep",("wsrep::init() failed: %d", rcode)); + WSREP_ERROR("wsrep::init() failed: %d, must shutdown", rcode); + free(wsrep); + wsrep = NULL; + } + return rcode; } else { @@ -651,7 +684,7 @@ void wsrep_init_startup (bool first) { if (wsrep_init()) unireg_abort(1); - wsrep_thr_lock_init(wsrep_thd_is_brute_force, wsrep_abort_thd, + wsrep_thr_lock_init(wsrep_thd_is_BF, wsrep_abort_thd, wsrep_debug, wsrep_convert_LOCK_to_trx, wsrep_on); /* Skip replication start if no cluster address */ @@ -1358,6 +1391,13 @@ int wsrep_to_isolation_begin(THD *thd, char *db_, char *table_, DBUG_ASSERT(thd->wsrep_exec_mode == LOCAL_STATE); DBUG_ASSERT(thd->wsrep_trx_meta.gtid.seqno == WSREP_SEQNO_UNDEFINED); + if (thd->global_read_lock.can_acquire_protection()) + { + WSREP_DEBUG("Aborting TOI: Global Read-Lock (FTWRL) in place: %s %lu", + thd->query(), thd->thread_id); + return -1; + } + if (wsrep_debug && thd->mdl_context.has_locks()) { WSREP_DEBUG("thread holds MDL locks at TI begin: %s %lu", diff --git a/sql/wsrep_mysqld.h b/sql/wsrep_mysqld.h index 7cf4871d6f4..30264bf24c1 100644 --- a/sql/wsrep_mysqld.h +++ b/sql/wsrep_mysqld.h @@ -96,6 +96,8 @@ extern my_bool wsrep_replicate_myisam; extern my_bool wsrep_log_conflicts; extern ulong wsrep_mysql_replication_bundle; extern my_bool wsrep_load_data_splitting; +extern my_bool wsrep_restart_slave; +extern my_bool wsrep_restart_slave_activated; enum enum_wsrep_OSU_method { WSREP_OSU_TOI, WSREP_OSU_RSU }; @@ -158,6 +160,7 @@ extern "C" char * wsrep_thd_query(THD *thd); extern "C" query_id_t wsrep_thd_wsrep_last_query_id(THD *thd); extern "C" void wsrep_thd_set_wsrep_last_query_id(THD *thd, query_id_t id); extern "C" void wsrep_thd_awake(THD *thd, my_bool signal); +extern "C" int wsrep_thd_retry_counter(THD *thd); extern void wsrep_close_client_connections(my_bool wait_to_end); @@ -232,8 +235,9 @@ extern void wsrep_ready_wait(); enum wsrep_trx_status { WSREP_TRX_OK, - WSREP_TRX_ROLLBACK, - WSREP_TRX_ERROR, + WSREP_TRX_CERT_FAIL, /* certification failure, must abort */ + WSREP_TRX_SIZE_EXCEEDED, /* trx size exceeded */ + WSREP_TRX_ERROR, /* native mysql error */ }; extern enum wsrep_trx_status @@ -274,6 +278,7 @@ extern rpl_sidno wsrep_sidno; #endif /* GTID_SUPPORT */ extern my_bool wsrep_preordered_opt; +#ifdef HAVE_PSI_INTERFACE extern PSI_mutex_key key_LOCK_wsrep_ready; extern PSI_mutex_key key_COND_wsrep_ready; extern PSI_mutex_key key_LOCK_wsrep_sst; @@ -288,7 +293,7 @@ extern PSI_mutex_key key_LOCK_wsrep_replaying; extern PSI_cond_key key_COND_wsrep_replaying; extern PSI_mutex_key key_LOCK_wsrep_slave_threads; extern PSI_mutex_key key_LOCK_wsrep_desync; - +#endif /* HAVE_PSI_INTERFACE */ struct TABLE_LIST; int wsrep_to_isolation_begin(THD *thd, char *db_, char *table_, const TABLE_LIST* table_list); diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc index 050bb2073a8..0d767106fe9 100644 --- a/sql/wsrep_sst.cc +++ b/sql/wsrep_sst.cc @@ -51,9 +51,11 @@ extern const char wsrep_defaults_file[]; #define WSREP_SST_OPT_GTID "--gtid" #define WSREP_SST_OPT_BYPASS "--bypass" -#define WSREP_SST_MYSQLDUMP "mysqldump" -#define WSREP_SST_RSYNC "rsync" -#define WSREP_SST_SKIP "skip" +#define WSREP_SST_MYSQLDUMP "mysqldump" +#define WSREP_SST_RSYNC "rsync" +#define WSREP_SST_SKIP "skip" +#define WSREP_SST_XTRABACKUP "xtrabackup" +#define WSREP_SST_XTRABACKUP_V2 "xtrabackup-v2" #define WSREP_SST_DEFAULT WSREP_SST_RSYNC #define WSREP_SST_ADDRESS_AUTO "AUTO" #define WSREP_SST_AUTH_MASK "********" @@ -231,7 +233,13 @@ void wsrep_sst_complete (const wsrep_uuid_t* sst_uuid, } else { - WSREP_WARN("Nobody is waiting for SST."); + /* This can happen when called from wsrep_synced_cb(). + At the moment there is no way to check there + if main thread is still waiting for signal, + so wsrep_sst_complete() is called from there + each time wsrep_ready changes from FALSE -> TRUE. + */ + WSREP_DEBUG("Nobody is waiting for SST."); } mysql_mutex_unlock (&LOCK_wsrep_sst); } @@ -315,6 +323,33 @@ static char* my_fgets (char* buf, size_t buf_len, FILE* stream) return ret; } +/* + Generate opt_binlog_opt_val for sst_donate_other(), sst_prepare_other(). + + Returns zero on success, negative error code otherwise. + + String containing binlog name is stored in param ret if binlog is enabled + and GTID mode is on, otherwise empty string. Returned string should be + freed with my_free(). + */ +static int generate_binlog_opt_val(char** ret) +{ + DBUG_ASSERT(ret); + *ret= NULL; + if (opt_bin_log) + { + assert(opt_bin_logname); + *ret= strcmp(opt_bin_logname, "0") ? + my_strdup(opt_bin_logname, MYF(0)) : my_strdup("", MYF(0)); + } + else + { + *ret= my_strdup("", MYF(0)); + } + if (!*ret) return -ENOMEM; + return 0; +} + static void* sst_joiner_thread (void* a) { sst_thread_arg* arg= (sst_thread_arg*) a; @@ -409,21 +444,32 @@ static ssize_t sst_prepare_other (const char* method, ssize_t cmd_len= 1024; char cmd_str[cmd_len]; const char* sst_dir= mysql_real_data_home; - const char* binlog_opt= (opt_bin_logname ? (strcmp(opt_bin_logname, "0") ? WSREP_SST_OPT_BINLOG : "") : ""); - const char* binlog_opt_val= (opt_bin_logname ? (strcmp(opt_bin_logname, "0") ? opt_bin_logname : "") : ""); - - int ret= snprintf (cmd_str, cmd_len, - "wsrep_sst_%s " - WSREP_SST_OPT_ROLE" 'joiner' " - WSREP_SST_OPT_ADDR" '%s' " - WSREP_SST_OPT_AUTH" '%s' " - WSREP_SST_OPT_DATA" '%s' " - WSREP_SST_OPT_CONF" '%s' " - WSREP_SST_OPT_PARENT" '%d'" - " %s '%s' ", - method, addr_in, (sst_auth_real) ? sst_auth_real : "", - sst_dir, wsrep_defaults_file, (int)getpid(), - binlog_opt, binlog_opt_val); + const char* binlog_opt= ""; + char* binlog_opt_val= NULL; + + int ret; + if ((ret= generate_binlog_opt_val(&binlog_opt_val))) + { + WSREP_ERROR("sst_prepare_other(): generate_binlog_opt_val() failed: %d", + ret); + return ret; + } + if (strlen(binlog_opt_val)) binlog_opt= WSREP_SST_OPT_BINLOG; + + + ret= snprintf (cmd_str, cmd_len, + "wsrep_sst_%s " + WSREP_SST_OPT_ROLE" 'joiner' " + WSREP_SST_OPT_ADDR" '%s' " + WSREP_SST_OPT_AUTH" '%s' " + WSREP_SST_OPT_DATA" '%s' " + WSREP_SST_OPT_CONF" '%s' " + WSREP_SST_OPT_PARENT" '%d'" + " %s '%s' ", + method, addr_in, (sst_auth_real) ? sst_auth_real : "", + sst_dir, wsrep_defaults_file, (int)getpid(), + binlog_opt, binlog_opt_val); + my_free(binlog_opt_val); if (ret < 0 || ret >= cmd_len) { @@ -933,8 +979,9 @@ wait_signal: else { WSREP_ERROR("Failed to read from: %s", proc.cmd()); + proc.wait(); } - if (err && proc.error()) err= proc.error(); + if (!err && proc.error()) err= proc.error(); } else { @@ -958,6 +1005,8 @@ wait_signal: return NULL; } + + static int sst_donate_other (const char* method, const char* addr, const char* uuid, @@ -966,25 +1015,34 @@ static int sst_donate_other (const char* method, { ssize_t cmd_len = 4096; char cmd_str[cmd_len]; - const char* binlog_opt= (opt_bin_logname ? (strcmp(opt_bin_logname, "0") ? WSREP_SST_OPT_BINLOG : "") : ""); - const char* binlog_opt_val= (opt_bin_logname ? (strcmp(opt_bin_logname, "0") ? opt_bin_logname : "") : ""); - - int ret= snprintf (cmd_str, cmd_len, - "wsrep_sst_%s " - WSREP_SST_OPT_ROLE" 'donor' " - WSREP_SST_OPT_ADDR" '%s' " - WSREP_SST_OPT_AUTH" '%s' " - WSREP_SST_OPT_SOCKET" '%s' " - WSREP_SST_OPT_DATA" '%s' " - WSREP_SST_OPT_CONF" '%s' " - " %s '%s' " - WSREP_SST_OPT_GTID" '%s:%lld'" - "%s", - method, addr, sst_auth_real, mysqld_unix_port, - mysql_real_data_home, wsrep_defaults_file, - binlog_opt, binlog_opt_val, - uuid, (long long) seqno, - bypass ? " "WSREP_SST_OPT_BYPASS : ""); + const char* binlog_opt= ""; + char* binlog_opt_val= NULL; + + int ret; + if ((ret= generate_binlog_opt_val(&binlog_opt_val))) + { + WSREP_ERROR("sst_donate_other(): generate_binlog_opt_val() failed: %d",ret); + return ret; + } + if (strlen(binlog_opt_val)) binlog_opt= WSREP_SST_OPT_BINLOG; + + ret= snprintf (cmd_str, cmd_len, + "wsrep_sst_%s " + WSREP_SST_OPT_ROLE" 'donor' " + WSREP_SST_OPT_ADDR" '%s' " + WSREP_SST_OPT_AUTH" '%s' " + WSREP_SST_OPT_SOCKET" '%s' " + WSREP_SST_OPT_DATA" '%s' " + WSREP_SST_OPT_CONF" '%s' " + " %s '%s' " + WSREP_SST_OPT_GTID" '%s:%lld'" + "%s", + method, addr, sst_auth_real, mysqld_unix_port, + mysql_real_data_home, wsrep_defaults_file, + binlog_opt, binlog_opt_val, + uuid, (long long) seqno, + bypass ? " "WSREP_SST_OPT_BYPASS : ""); + my_free(binlog_opt_val); if (ret < 0 || ret >= cmd_len) { @@ -1049,7 +1107,10 @@ void wsrep_SE_init_grab() void wsrep_SE_init_wait() { - mysql_cond_wait (&COND_wsrep_sst_init, &LOCK_wsrep_sst_init); + while (SE_initialized == false) + { + mysql_cond_wait (&COND_wsrep_sst_init, &LOCK_wsrep_sst_init); + } mysql_mutex_unlock (&LOCK_wsrep_sst_init); } diff --git a/sql/wsrep_thd.cc b/sql/wsrep_thd.cc index 2e1e86f2f30..78ba559380b 100644 --- a/sql/wsrep_thd.cc +++ b/sql/wsrep_thd.cc @@ -472,31 +472,53 @@ void wsrep_create_rollbacker() } } +my_bool wsrep_thd_is_BF(void *thd_ptr, my_bool sync) +{ + my_bool status = FALSE; + if (thd_ptr) + { + THD* thd = (THD*)thd_ptr; + if (sync) mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + status = ((thd->wsrep_exec_mode == REPL_RECV) || + (thd->wsrep_exec_mode == TOTAL_ORDER)); + if (sync) mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } + return status; +} + extern "C" -int wsrep_thd_is_brute_force(void *thd_ptr) +my_bool wsrep_thd_is_BF_or_commit(void *thd_ptr, my_bool sync) { - /* - Brute force: - Appliers and replaying are running in REPL_RECV mode. TOI statements - in TOTAL_ORDER mode. Locally committing transaction that has got - past wsrep->pre_commit() without error is running in LOCAL_COMMIT mode. - - Everything else is running in LOCAL_STATE and should not be considered - brute force. - */ - if (thd_ptr) { - switch (((THD *)thd_ptr)->wsrep_exec_mode) { - case LOCAL_STATE: return 0; - case REPL_RECV: return 1; - case TOTAL_ORDER: return 2; - case LOCAL_COMMIT: return 3; - } - DBUG_ASSERT(0); + bool status = FALSE; + if (thd_ptr) + { + THD* thd = (THD*)thd_ptr; + if (sync) mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + status = ((thd->wsrep_exec_mode == REPL_RECV) || + (thd->wsrep_exec_mode == TOTAL_ORDER) || + (thd->wsrep_exec_mode == LOCAL_COMMIT)); + if (sync) mysql_mutex_unlock(&thd->LOCK_wsrep_thd); } - return 0; + return status; } extern "C" +my_bool wsrep_thd_is_local(void *thd_ptr, my_bool sync) +{ + bool status = FALSE; + if (thd_ptr) + { + THD* thd = (THD*)thd_ptr; + if (sync) mysql_mutex_lock(&thd->LOCK_wsrep_thd); + + status = (thd->wsrep_exec_mode == LOCAL_STATE); + if (sync) mysql_mutex_unlock(&thd->LOCK_wsrep_thd); + } + return status; +} + int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal) { THD *victim_thd = (THD *) victim_thd_ptr; diff --git a/sql/wsrep_thd.h b/sql/wsrep_thd.h index bded13b5684..f719deae2b7 100644 --- a/sql/wsrep_thd.h +++ b/sql/wsrep_thd.h @@ -24,9 +24,13 @@ void wsrep_replay_transaction(THD *thd); void wsrep_create_appliers(long threads); void wsrep_create_rollbacker(); -extern "C" int wsrep_thd_is_brute_force(void *thd_ptr); -extern "C" int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, +int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal); + +extern my_bool wsrep_thd_is_BF(void *thd_ptr, my_bool sync); +//extern "C" my_bool wsrep_thd_is_BF(void *thd_ptr, my_bool sync); +extern "C" my_bool wsrep_thd_is_BF_or_commit(void *thd_ptr, my_bool sync); +extern "C" my_bool wsrep_thd_is_local(void *thd_ptr, my_bool sync); extern "C" int wsrep_thd_in_locking_session(void *thd_ptr); #endif /* WSREP_THD_H */ diff --git a/sql/wsrep_utils.cc b/sql/wsrep_utils.cc index fae9d97eedd..cdee1c2cece 100644 --- a/sql/wsrep_utils.cc +++ b/sql/wsrep_utils.cc @@ -321,6 +321,10 @@ thd::~thd () /* Returns INADDR_NONE, INADDR_ANY, INADDR_LOOPBACK or something else */ unsigned int wsrep_check_ip (const char* const addr) { +#if 0 + if (addr && 0 == strcasecmp(addr, MY_BIND_ALL_ADDRESSES)) return INADDR_ANY; +#endif + unsigned int ret = INADDR_NONE; struct addrinfo *res, hints; @@ -362,7 +366,6 @@ unsigned int wsrep_check_ip (const char* const addr) } extern char* my_bind_addr_str; -extern uint mysqld_port; size_t wsrep_guess_ip (char* buf, size_t buf_len) { diff --git a/sql/wsrep_var.cc b/sql/wsrep_var.cc index 02e2584e86d..d187c7b9e35 100644 --- a/sql/wsrep_var.cc +++ b/sql/wsrep_var.cc @@ -331,7 +331,16 @@ bool wsrep_cluster_address_update (sys_var *self, THD* thd, enum_var_type type) bool wsrep_on_saved= thd->variables.wsrep_on; thd->variables.wsrep_on= false; + /* stop replication is heavy operation, and includes closing all client + connections. Closing clients may need to get LOCK_global_system_variables + at least in MariaDB. + + Note: releasing LOCK_global_system_variables may cause race condition, if + there can be several concurrent clients changing wsrep_provider + */ + mysql_mutex_unlock(&LOCK_global_system_variables); wsrep_stop_replication(thd); + mysql_mutex_lock(&LOCK_global_system_variables); if (wsrep_start_replication()) { diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 6cb664dcb05..82da55d069a 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -1207,8 +1207,8 @@ innobase_srv_conc_enter_innodb( trx_t* trx) /*!< in: transaction handle */ { #ifdef WITH_WSREP - if (wsrep_on(trx->mysql_thd) && - wsrep_thd_is_brute_force(trx->mysql_thd)) return; + if (wsrep_on(trx->mysql_thd) && + wsrep_thd_is_BF(trx->mysql_thd, FALSE)) return; #endif /* WITH_WSREP */ if (srv_thread_concurrency) { if (trx->n_tickets_to_enter_innodb > 0) { @@ -1245,8 +1245,8 @@ innobase_srv_conc_exit_innodb( ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch)); #endif /* UNIV_SYNC_DEBUG */ #ifdef WITH_WSREP - if (wsrep_on(trx->mysql_thd) && - wsrep_thd_is_brute_force(trx->mysql_thd)) return; + if (wsrep_on(trx->mysql_thd) && + wsrep_thd_is_BF(trx->mysql_thd, FALSE)) return; #endif /* WITH_WSREP */ /* This is to avoid making an unnecessary function call. */ @@ -3534,11 +3534,6 @@ innobase_commit_low( /*================*/ trx_t* trx) /*!< in: transaction handle */ { - if (trx_is_started(trx)) { - - trx_commit_for_mysql(trx); - } - #ifdef WITH_WSREP THD* thd = (THD*)trx->mysql_thd; const char* tmp = 0; @@ -3556,7 +3551,10 @@ innobase_commit_low( #endif /* WSREP_PROC_INFO */ } #endif /* WITH_WSREP */ + if (trx_is_started(trx)) { + trx_commit_for_mysql(trx); + } #ifdef WITH_WSREP if (wsrep_on((void*)thd)) { thd_proc_info(thd, tmp); } #endif /* WITH_WSREP */ @@ -5608,7 +5606,7 @@ wsrep_innobase_mysql_sort( tmp_length = charset->coll->strnxfrm(charset, str, str_length, str_length, tmp_str, tmp_length, 0); /**/ - DBUG_ASSERT(tmp_length == str_length); + DBUG_ASSERT(tmp_length <= str_length); break; } @@ -7294,7 +7292,9 @@ ha_innobase::write_row( || sql_command == SQLCOM_CREATE_INDEX #ifdef WITH_WSREP || (wsrep_on(user_thd) && wsrep_load_data_splitting && - sql_command == SQLCOM_LOAD) + sql_command == SQLCOM_LOAD && + !thd_test_options( + user_thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) #endif /* WITH_WSREP */ || sql_command == SQLCOM_DROP_INDEX) && num_write_row >= 10000) { @@ -7304,7 +7304,6 @@ ha_innobase::write_row( wsrep_thd_query(user_thd)); } #endif /* WITH_WSREP */ - /* ALTER TABLE is COMMITted at every 10000 copied rows. The IX table lock for the original table has to be re-issued. As this method will be called on a temporary table where the @@ -7343,7 +7342,8 @@ no_commit: { case WSREP_TRX_OK: break; - case WSREP_TRX_ROLLBACK: + case WSREP_TRX_SIZE_EXCEEDED: + case WSREP_TRX_CERT_FAIL: case WSREP_TRX_ERROR: DBUG_RETURN(1); } @@ -7367,7 +7367,8 @@ no_commit: { case WSREP_TRX_OK: break; - case WSREP_TRX_ROLLBACK: + case WSREP_TRX_SIZE_EXCEEDED: + case WSREP_TRX_CERT_FAIL: case WSREP_TRX_ERROR: DBUG_RETURN(1); } @@ -7490,9 +7491,10 @@ no_commit: #ifdef WITH_WSREP /* workaround for LP bug #355000, retrying the insert */ case SQLCOM_INSERT: - if (wsrep_on(current_thd) && - auto_inc_inserted && - wsrep_drupal_282555_workaround && + if (wsrep_on(current_thd) && + auto_inc_inserted && + wsrep_drupal_282555_workaround && + wsrep_thd_retry_counter(current_thd) == 0 && !thd_test_options(current_thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { @@ -9620,6 +9622,13 @@ ha_innobase::wsrep_append_keys( } else { ut_a(table->s->keys <= 256); uint i; + bool hasPK= false; + + for (i=0; i<table->s->keys && !hasPK; ++i) { + KEY* key_info = table->key_info + i; + if (key_info->flags & HA_NOSAME) hasPK = true; + } + for (i=0; i<table->s->keys; ++i) { uint len; char keyval0[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'}; @@ -9640,14 +9649,11 @@ ha_innobase::wsrep_append_keys( table->s->table_name.str, key_info->name); } - if (key_info->flags & HA_NOSAME || + if (!hasPK || key_info->flags & HA_NOSAME || ((tab && dict_table_get_referenced_constraint(tab, idx)) || (!tab && referenced_by_foreign_key()))) { - if (key_info->flags & HA_NOSAME || shared) - key_appended = true; - len = wsrep_store_key_val_for_row( table, i, key0, key_info->key_length, record0, &is_null); @@ -9656,6 +9662,10 @@ ha_innobase::wsrep_append_keys( thd, trx, table_share, table, keyval0, len+1, shared); if (rcode) DBUG_RETURN(rcode); + + if (key_info->flags & HA_NOSAME || shared) + key_appended = true; + } else { @@ -16853,6 +16863,7 @@ wsrep_abort_slave_trx(wsrep_seqno_t bf_seqno, wsrep_seqno_t victim_seqno) } /*******************************************************************//** This function is used to kill one transaction in BF. */ + int wsrep_innobase_kill_one_trx(void * const bf_thd_ptr, const trx_t * const bf_trx, diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h index 53c196b6f9e..e2c27acde31 100644 --- a/storage/innobase/include/ha_prototypes.h +++ b/storage/innobase/include/ha_prototypes.h @@ -291,7 +291,7 @@ UNIV_INTERN int wsrep_innobase_kill_one_trx(void *thd_ptr, const trx_t *bf_trx, trx_t *victim_trx, ibool signal); -extern "C" int wsrep_thd_is_brute_force(void *thd_ptr); +my_bool wsrep_thd_is_BF(void *thd_ptr, my_bool sync); int wsrep_trx_order_before(void *thd1, void *thd2); void wsrep_innobase_mysql_sort(int mysql_type, uint charset_number, unsigned char* str, unsigned int str_length); diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index f34bfc3656a..6ef6685b371 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -894,7 +894,6 @@ lock_trx_has_rec_x_lock( record */ #define LOCK_CONV_BY_OTHER 4096 /*!< this bit is set when the lock is created by other transaction */ -#define WSREP_BF 8192 #if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION|LOCK_CONV_BY_OTHER)&LOCK_TYPE_MASK # error #endif diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 92cc5ee89a8..ae32738cb17 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -979,47 +979,6 @@ lock_rec_has_to_wait( && !lock_mode_compatible(static_cast<enum lock_mode>( LOCK_MODE_MASK & type_mode), lock_get_mode(lock2))) { -#ifdef WITH_WSREP - /* if BF thread is locking and has conflict with another BF - thread, we need to look at trx ordering and lock types */ - if (for_locking && - wsrep_thd_is_brute_force(trx->mysql_thd) && - wsrep_thd_is_brute_force(lock2->trx->mysql_thd)) { - - if (wsrep_debug) { - fprintf(stderr, "\n BF-BF lock conflict \n"); - lock_rec_print(stderr, lock2); - } - - if (wsrep_trx_order_before(trx->mysql_thd, - lock2->trx->mysql_thd) && - (type_mode & LOCK_MODE_MASK) == LOCK_X && - (lock2->type_mode & LOCK_MODE_MASK) == LOCK_X) - { - /* exclusive lock conflicts are not accepted */ - fprintf(stderr, "BF-BF X lock conflict\n"); - lock_rec_print(stderr, lock2); - - abort(); - } else { - if (wsrep_debug) { - fprintf(stderr, - "BF conflict, modes: %lu %lu\n", - type_mode, - lock2->type_mode); -#ifdef OUT - fprintf(stderr, - "seqnos %llu %llu\n", - (long long)wsrep_thd_trx_seqno( - trx->mysql_thd), - (long long)wsrep_thd_trx_seqno( - lock2->trx->mysql_thd)); -#endif - } - return FALSE; - } - } -#endif /* WITH_WSREP */ /* We have somewhat complex rules when gap type record locks cause waits */ @@ -1069,6 +1028,44 @@ lock_rec_has_to_wait( return(FALSE); } +#ifdef WITH_WSREP + /* if BF thread is locking and has conflict with another BF + thread, we need to look at trx ordering and lock types */ + if (for_locking && + wsrep_thd_is_BF(trx->mysql_thd, FALSE) && + wsrep_thd_is_BF(lock2->trx->mysql_thd, TRUE)) { + + if (wsrep_debug) { + fprintf(stderr, "\n BF-BF lock conflict \n"); + lock_rec_print(stderr, lock2); + } + + if (wsrep_trx_order_before(trx->mysql_thd, + lock2->trx->mysql_thd) && + (type_mode & LOCK_MODE_MASK) == LOCK_X && + (lock2->type_mode & LOCK_MODE_MASK) == LOCK_X) + { + /* exclusive lock conflicts are not accepted */ + fprintf(stderr, "BF-BF X lock conflict\n"); + lock_rec_print(stderr, lock2); + abort(); + } else { + /* if lock2->index->n_uniq <= + lock2->index->n_user_defined_cols + operation is on uniq index + */ + if (wsrep_debug) fprintf(stderr, + "BF conflict, modes: %lu %lu, " + "idx: %s-%s n_uniq %u n_user %u\n", + type_mode, lock2->type_mode, + lock2->index->name, + lock2->index->table_name, + lock2->index->n_uniq, + lock2->index->n_user_defined_cols); + return FALSE; + } + } +#endif /* WITH_WSREP */ return(TRUE); } @@ -1626,17 +1623,16 @@ lock_rec_other_has_expl_req( #endif /* UNIV_DEBUG */ #ifdef WITH_WSREP -static -void +static void wsrep_kill_victim(const trx_t * const trx, const lock_t *lock) { ut_ad(lock_mutex_own()); ut_ad(trx_mutex_own(lock->trx)); - my_bool bf_this = wsrep_thd_is_brute_force(trx->mysql_thd); - my_bool bf_other = wsrep_thd_is_brute_force(lock->trx->mysql_thd); + my_bool bf_this = wsrep_thd_is_BF(trx->mysql_thd, FALSE); + my_bool bf_other = wsrep_thd_is_BF(lock->trx->mysql_thd, TRUE); if ((bf_this && !bf_other) || (bf_this && bf_other && wsrep_trx_order_before( trx->mysql_thd, lock->trx->mysql_thd))) { - + if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) { if (wsrep_debug) fprintf(stderr, "WSREP: BF victim waiting\n"); @@ -1930,11 +1926,6 @@ lock_rec_create( lock->trx = trx; lock->type_mode = (type_mode & ~LOCK_TYPE_MASK) | LOCK_REC; -#ifdef WITH_WSREP - if (wsrep_thd_is_brute_force(trx->mysql_thd)) { - lock->type_mode |= WSREP_BF; - } -#endif /* WITH_WSREP */ lock->index = index; lock->un_member.rec_lock.space = space; @@ -1954,12 +1945,12 @@ lock_rec_create( ut_ad(index->table->n_ref_count > 0 || !index->table->can_be_evicted); #ifdef WITH_WSREP - if (c_lock && wsrep_thd_is_brute_force(trx->mysql_thd)) { + if (c_lock && wsrep_thd_is_BF(trx->mysql_thd, FALSE)) { lock_t *hash = (lock_t *)c_lock->hash; lock_t *prev = NULL; while (hash && - wsrep_thd_is_brute_force(((lock_t *)hash)->trx->mysql_thd) && + wsrep_thd_is_BF(((lock_t *)hash)->trx->mysql_thd, TRUE) && wsrep_trx_order_before( ((lock_t *)hash)->trx->mysql_thd, trx->mysql_thd)) { @@ -2363,11 +2354,6 @@ lock_rec_lock_fast( || (LOCK_MODE_MASK & mode) == LOCK_X); ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP || mode - (LOCK_MODE_MASK & mode) == 0 -#ifdef WITH_WSREP - || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == 0 - || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == LOCK_GAP - || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == LOCK_REC_NOT_GAP -#endif /* WITH_WSREP */ || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP); ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index)); @@ -2386,8 +2372,7 @@ lock_rec_lock_fast( #else lock = lock_rec_create( mode, block, heap_no, index, trx, FALSE); -#endif - +#endif /* WITH_WSREP */ } status = LOCK_REC_SUCCESS_CREATED; } else { @@ -2455,11 +2440,6 @@ lock_rec_lock_slow( || (LOCK_MODE_MASK & mode) == LOCK_X); ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP || mode - (LOCK_MODE_MASK & mode) == 0 -#ifdef WITH_WSREP - || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == 0 - || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == LOCK_GAP - || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == LOCK_REC_NOT_GAP -#endif /* WITH_WSREP */ || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP); ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index)); @@ -2569,19 +2549,8 @@ lock_rec_lock( || (LOCK_MODE_MASK & mode) == LOCK_X); ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP -#ifdef WITH_WSREP - || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == 0 - || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == LOCK_GAP - || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == LOCK_REC_NOT_GAP -#endif /* WITH_WSREP */ || mode - (LOCK_MODE_MASK & mode) == 0); -#ifdef WITH_WSREP - if (wsrep_thd_is_brute_force(thr_get_trx(thr)->mysql_thd)) { - mode |= WSREP_BF; - } -#endif - ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index)); /* We try a simplified and faster subroutine for the most @@ -4025,17 +3994,19 @@ lock_deadlock_select_victim( if (trx_weight_ge(ctx->wait_lock->trx, ctx->start)) { /* The joining transaction is 'smaller', choose it as the victim and roll it back. */ + #ifdef WITH_WSREP - if (!wsrep_thd_is_brute_force(ctx->start->mysql_thd)) { - return(ctx->start); - } + if (wsrep_thd_is_BF(ctx->start->mysql_thd, TRUE)) + return(ctx->wait_lock->trx); + else #endif /* WITH_WSREP */ return(ctx->start); } + #ifdef WITH_WSREP - if (wsrep_thd_is_brute_force(ctx->wait_lock->trx->mysql_thd)) + if (wsrep_thd_is_BF(ctx->wait_lock->trx->mysql_thd, TRUE)) return(ctx->start); - else + else #endif /* WITH_WSREP */ return(ctx->wait_lock->trx); } @@ -4167,7 +4138,7 @@ lock_deadlock_search( ctx->too_deep = TRUE; #ifdef WITH_WSREP - if (wsrep_thd_is_brute_force(ctx->start->mysql_thd)) + if (wsrep_thd_is_BF(ctx->start->mysql_thd, TRUE)) return(ctx->wait_lock->trx->id); else #endif /* WITH_WSREP */ @@ -4190,7 +4161,7 @@ lock_deadlock_search( ctx->too_deep = TRUE; #ifdef WITH_WSREP - if (wsrep_thd_is_brute_force(ctx->start->mysql_thd)) + if (wsrep_thd_is_BF(ctx->start->mysql_thd, TRUE)) return(lock->trx->id); else #endif /* WITH_WSREP */ @@ -4313,7 +4284,7 @@ lock_deadlock_check_and_resolve( ut_a(victim_trx_id == trx->id); #ifdef WITH_WSREP - if (!wsrep_thd_is_brute_force(ctx.start->mysql_thd)) + if (!wsrep_thd_is_BF(ctx.start->mysql_thd, TRUE)) { #endif /* WITH_WSREP */ if (!srv_read_only_mode) { @@ -4409,7 +4380,7 @@ lock_table_create( UT_LIST_ADD_LAST(trx_locks, trx->lock.trx_locks, lock); #ifdef WITH_WSREP - if (c_lock && wsrep_thd_is_brute_force(trx->mysql_thd)) { + if (c_lock && wsrep_thd_is_BF(trx->mysql_thd, FALSE)) { UT_LIST_INSERT_AFTER( un_member.tab_lock.locks, table->locks, c_lock, lock); } else { @@ -6489,7 +6460,7 @@ lock_rec_convert_impl_to_expl( if (rec_get_deleted_flag(rec, rec_offs_comp(offsets)) #ifdef WITH_WSREP - && !wsrep_thd_is_brute_force(impl_trx->mysql_thd) + && !wsrep_thd_is_BF(impl_trx->mysql_thd, FALSE) /* BF-BF conflict is possible if advancing into lock_rec_other_has_conflicting*/ #endif /* WITH_WSREP */ diff --git a/storage/innobase/lock/lock0wait.cc b/storage/innobase/lock/lock0wait.cc index a1c35e20ead..388c847f580 100644 --- a/storage/innobase/lock/lock0wait.cc +++ b/storage/innobase/lock/lock0wait.cc @@ -184,6 +184,28 @@ lock_wait_table_reserve_slot( return(NULL); } +#ifdef WITH_WSREP +/*********************************************************************//** +check if lock timeout was for priority thread, +as a side effect trigger lock monitor +@return false for regular lock timeout */ +static ibool +wsrep_is_BF_lock_timeout( +/*====================*/ + trx_t* trx) /* in: trx to check for lock priority */ +{ + if (wsrep_on(trx->mysql_thd) && + wsrep_thd_is_BF(trx->mysql_thd, FALSE)) { + fprintf(stderr, "WSREP: BF lock wait long\n"); + srv_print_innodb_monitor = TRUE; + srv_print_innodb_lock_monitor = TRUE; + os_event_set(srv_monitor_event); + return TRUE; + } + return FALSE; + } +#endif /* WITH_WSREP */ + /***************************************************************//** Puts a user OS thread to wait for a lock to be released. If an error occurs during the wait trx->error_state associated with thr is @@ -375,9 +397,15 @@ lock_wait_suspend_thread( if (lock_wait_timeout < 100000000 && wait_time > (double) lock_wait_timeout) { +#ifdef WITH_WSREP + if (!wsrep_is_BF_lock_timeout(trx)) { +#endif /* WITH_WSREP */ trx->error_state = DB_LOCK_WAIT_TIMEOUT; +#ifdef WITH_WSREP + } +#endif /* WITH_WSREP */ MONITOR_INC(MONITOR_TIMEOUT); } @@ -461,8 +489,13 @@ lock_wait_check_and_cancel( if (trx->lock.wait_lock) { ut_a(trx->lock.que_state == TRX_QUE_LOCK_WAIT); - +#ifdef WITH_WSREP + if (!wsrep_is_BF_lock_timeout(trx)) { +#endif /* WITH_WSREP */ lock_cancel_waiting_and_release(trx->lock.wait_lock); +#ifdef WITH_WSREP + } +#endif /* WITH_WSREP */ } lock_mutex_exit(); diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index 5d3f1dd8f24..8e60c11c8ca 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -1926,9 +1926,6 @@ row_ins_scan_sec_index_for_duplicate( mem_heap_t* offsets_heap) /*!< in/out: memory heap that can be emptied */ { -#ifdef WITH_WSREP - trx_t* trx = thr_get_trx(thr); -#endif ulint n_unique; int cmp; ulint n_fields_cmp; @@ -1997,16 +1994,8 @@ row_ins_scan_sec_index_for_duplicate( if (flags & BTR_NO_LOCKING_FLAG) { /* Set no locks when applying log in online table rebuild. */ -#ifdef WITH_WSREP - /* slave applier must not get duplicate error */ - } else if (allow_duplicates || - (wsrep_on(trx->mysql_thd) && - wsrep_thd_is_brute_force(trx->mysql_thd))) { -#else } else if (allow_duplicates) { -#endif - /* If the SQL-query will update or replace duplicate key we will take X-lock for duplicates ( REPLACE, LOAD DATAFILE REPLACE, @@ -2016,6 +2005,10 @@ row_ins_scan_sec_index_for_duplicate( lock_type, block, rec, index, offsets, thr); } else { +#ifdef WITH_WSREP + /* appliers don't need dupkey checks */ + if (!wsrep_thd_is_BF(thr_get_trx(thr)->mysql_thd, 0)) +#endif /* WITH_WSREP */ err = row_ins_set_shared_rec_lock( lock_type, block, rec, index, offsets, thr); } @@ -2211,13 +2204,7 @@ row_ins_duplicate_error_in_clust( sure that in roll-forward we get the same duplicate errors as in original execution */ -#ifdef WITH_WSREP - if (trx->duplicates || - (wsrep_on(trx->mysql_thd) && - wsrep_thd_is_brute_force(trx->mysql_thd))) { -#else if (trx->duplicates) { -#endif /* If the SQL-query will update or replace duplicate key we will take X-lock for @@ -2262,13 +2249,7 @@ duplicate: offsets = rec_get_offsets(rec, cursor->index, offsets, ULINT_UNDEFINED, &heap); -#ifdef WITH_WSREP - if (trx->duplicates || - (wsrep_on(trx->mysql_thd) && - wsrep_thd_is_brute_force(trx->mysql_thd))) { -#else if (trx->duplicates) { -#endif /* If the SQL-query will update or replace duplicate key we will take X-lock for diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc index eb5d9e5feb6..d34ffdd5f6d 100644 --- a/storage/innobase/row/row0upd.cc +++ b/storage/innobase/row/row0upd.cc @@ -373,6 +373,8 @@ wsrep_row_upd_check_foreign_constraints( trx = thr_get_trx(thr); + /* TODO: make native slave thread bail out here */ + rec = btr_pcur_get_rec(pcur); ut_ad(rec_offs_validate(rec, index, offsets)); diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index 15730b4f00a..aab893df09b 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -2409,27 +2409,6 @@ suspend_thread: OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */ } -#ifdef WITH_WSREP_TODO -/*********************************************************************//** -check if lock timeout was for priority thread, -as a side effect trigger lock monitor -@return false for regular lock timeout */ -static ibool -wsrep_is_BF_lock_timeout( -/*====================*/ - srv_slot_t* slot) /* in: lock slot to check for lock priority */ -{ - if (wsrep_on(thr_get_trx(slot->thr)->mysql_thd) && - wsrep_thd_is_brute_force((thr_get_trx(slot->thr))->mysql_thd)) { - fprintf(stderr, "WSREP: BF lock wait long\n"); - srv_print_innodb_monitor = TRUE; - srv_print_innodb_lock_monitor = TRUE; - os_event_set(lock_sys->timeout_event); - return TRUE; - } - return FALSE; - } -#endif /* WITH_WSREP_TODO */ /*********************************************************************//** Check if purge should stop. @return true if it should shutdown. */ diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc index cc1c095f16e..4af61d4869b 100644 --- a/storage/innobase/trx/trx0sys.cc +++ b/storage/innobase/trx/trx0sys.cc @@ -178,7 +178,12 @@ trx_sys_flush_max_trx_id(void) mtr_t mtr; trx_sysf_t* sys_header; +#ifndef WITH_WSREP + /* wsrep_fake_trx_id violates this assert + * Copied from trx_sys_get_new_trx_id + */ ut_ad(mutex_own(&trx_sys->mutex)); +#endif /* WITH_WSREP */ if (!srv_read_only_mode) { mtr_start(&mtr); diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 61287e73acc..a9c12d9259c 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -1437,8 +1437,8 @@ innobase_srv_conc_enter_innodb( trx_t* trx) /*!< in: transaction handle */ { #ifdef WITH_WSREP - if (wsrep_on(trx->mysql_thd) && - wsrep_thd_is_brute_force(trx->mysql_thd)) return; + if (wsrep_on(trx->mysql_thd) && + wsrep_thd_is_BF(trx->mysql_thd, FALSE)) return; #endif /* WITH_WSREP */ if (srv_thread_concurrency) { if (trx->n_tickets_to_enter_innodb > 0) { @@ -1475,8 +1475,8 @@ innobase_srv_conc_exit_innodb( ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch)); #endif /* UNIV_SYNC_DEBUG */ #ifdef WITH_WSREP - if (wsrep_on(trx->mysql_thd) && - wsrep_thd_is_brute_force(trx->mysql_thd)) return; + if (wsrep_on(trx->mysql_thd) && + wsrep_thd_is_BF(trx->mysql_thd, FALSE)) return; #endif /* WITH_WSREP */ /* This is to avoid making an unnecessary function call. */ @@ -3967,11 +3967,6 @@ innobase_commit_low( /*================*/ trx_t* trx) /*!< in: transaction handle */ { - if (trx_is_started(trx)) { - - trx_commit_for_mysql(trx); - } - #ifdef WITH_WSREP THD* thd = (THD*)trx->mysql_thd; const char* tmp = 0; @@ -3989,7 +3984,10 @@ innobase_commit_low( #endif /* WSREP_PROC_INFO */ } #endif /* WITH_WSREP */ + if (trx_is_started(trx)) { + trx_commit_for_mysql(trx); + } #ifdef WITH_WSREP if (wsrep_on((void*)thd)) { thd_proc_info(thd, tmp); } #endif /* WITH_WSREP */ @@ -6054,7 +6052,7 @@ wsrep_innobase_mysql_sort( tmp_length = charset->coll->strnxfrm(charset, str, str_length, str_length, tmp_str, tmp_length, 0); /**/ - DBUG_ASSERT(tmp_length == str_length); + DBUG_ASSERT(tmp_length <= str_length); break; } @@ -7755,7 +7753,6 @@ ha_innobase::write_row( wsrep_thd_query(user_thd)); } #endif /* WITH_WSREP */ - /* ALTER TABLE is COMMITted at every 10000 copied rows. The IX table lock for the original table has to be re-issued. As this method will be called on a temporary table where the @@ -7794,7 +7791,8 @@ no_commit: { case WSREP_TRX_OK: break; - case WSREP_TRX_ROLLBACK: + case WSREP_TRX_SIZE_EXCEEDED: + case WSREP_TRX_CERT_FAIL: case WSREP_TRX_ERROR: DBUG_RETURN(1); } @@ -7818,10 +7816,12 @@ no_commit: { case WSREP_TRX_OK: break; - case WSREP_TRX_ROLLBACK: + case WSREP_TRX_SIZE_EXCEEDED: + case WSREP_TRX_CERT_FAIL: case WSREP_TRX_ERROR: DBUG_RETURN(1); } + if (binlog_hton->commit(binlog_hton, user_thd, 1)) DBUG_RETURN(1); wsrep_post_commit(user_thd, TRUE); @@ -7949,9 +7949,10 @@ no_commit: #ifdef WITH_WSREP /* workaround for LP bug #355000, retrying the insert */ case SQLCOM_INSERT: - if (wsrep_on(current_thd) && - auto_inc_inserted && - wsrep_drupal_282555_workaround && + if (wsrep_on(current_thd) && + auto_inc_inserted && + wsrep_drupal_282555_workaround && + wsrep_thd_retry_counter(current_thd) == 0 && !thd_test_options(current_thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { @@ -7963,8 +7964,7 @@ no_commit: error= DB_SUCCESS; wsrep_thd_set_conflict_state( current_thd, MUST_ABORT); - innobase_srv_conc_exit_innodb( - prebuilt->trx); + innobase_srv_conc_exit_innodb(prebuilt->trx); /* jump straight to func exit over * later wsrep hooks */ goto func_exit; @@ -10133,6 +10133,13 @@ ha_innobase::wsrep_append_keys( } else { ut_a(table->s->keys <= 256); uint i; + bool hasPK= false; + + for (i=0; i<table->s->keys && !hasPK; ++i) { + KEY* key_info = table->key_info + i; + if (key_info->flags & HA_NOSAME) hasPK = true; + } + for (i=0; i<table->s->keys; ++i) { uint len; char keyval0[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'}; @@ -10153,14 +10160,11 @@ ha_innobase::wsrep_append_keys( table->s->table_name.str, key_info->name); } - if (key_info->flags & HA_NOSAME || + if (!hasPK || key_info->flags & HA_NOSAME || ((tab && dict_table_get_referenced_constraint(tab, idx)) || (!tab && referenced_by_foreign_key()))) { - if (key_info->flags & HA_NOSAME || shared) - key_appended = true; - len = wsrep_store_key_val_for_row( table, i, key0, key_info->key_length, record0, &is_null); @@ -10169,6 +10173,10 @@ ha_innobase::wsrep_append_keys( thd, trx, table_share, table, keyval0, len+1, shared); if (rcode) DBUG_RETURN(rcode); + + if (key_info->flags & HA_NOSAME || shared) + key_appended = true; + } else { @@ -17759,6 +17767,7 @@ wsrep_abort_slave_trx(wsrep_seqno_t bf_seqno, wsrep_seqno_t victim_seqno) } /*******************************************************************//** This function is used to kill one transaction in BF. */ + int wsrep_innobase_kill_one_trx(void * const bf_thd_ptr, const trx_t * const bf_trx, diff --git a/storage/xtradb/include/ha_prototypes.h b/storage/xtradb/include/ha_prototypes.h index 7d8224820c6..d027deb6140 100644 --- a/storage/xtradb/include/ha_prototypes.h +++ b/storage/xtradb/include/ha_prototypes.h @@ -290,7 +290,7 @@ UNIV_INTERN int wsrep_innobase_kill_one_trx(void *thd_ptr, const trx_t *bf_trx, trx_t *victim_trx, ibool signal); -extern "C" int wsrep_thd_is_brute_force(void *thd_ptr); +my_bool wsrep_thd_is_BF(void *thd_ptr, my_bool sync); int wsrep_trx_order_before(void *thd1, void *thd2); void wsrep_innobase_mysql_sort(int mysql_type, uint charset_number, unsigned char* str, unsigned int str_length); diff --git a/storage/xtradb/include/lock0lock.h b/storage/xtradb/include/lock0lock.h index d11f3b006d4..466e728f65b 100644 --- a/storage/xtradb/include/lock0lock.h +++ b/storage/xtradb/include/lock0lock.h @@ -906,7 +906,6 @@ lock_trx_has_rec_x_lock( record */ #define LOCK_CONV_BY_OTHER 4096 /*!< this bit is set when the lock is created by other transaction */ -#define WSREP_BF 8192 #if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION|LOCK_CONV_BY_OTHER)&LOCK_TYPE_MASK # error #endif diff --git a/storage/xtradb/lock/lock0lock.cc b/storage/xtradb/lock/lock0lock.cc index c6cb885ca4f..f78f25df895 100644 --- a/storage/xtradb/lock/lock0lock.cc +++ b/storage/xtradb/lock/lock0lock.cc @@ -55,7 +55,6 @@ extern my_bool wsrep_debug; extern my_bool wsrep_log_conflicts; #include "ha_prototypes.h" #endif - /* Restricts the length of search we will do in the waits-for graph of transactions */ #define LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK 1000000 @@ -981,47 +980,6 @@ lock_rec_has_to_wait( && !lock_mode_compatible(static_cast<enum lock_mode>( LOCK_MODE_MASK & type_mode), lock_get_mode(lock2))) { -#ifdef WITH_WSREP - /* if BF thread is locking and has conflict with another BF - thread, we need to look at trx ordering and lock types */ - if (for_locking && - wsrep_thd_is_brute_force(trx->mysql_thd) && - wsrep_thd_is_brute_force(lock2->trx->mysql_thd)) { - - if (wsrep_debug) { - fprintf(stderr, "\n BF-BF lock conflict \n"); - lock_rec_print(stderr, lock2); - } - - if (wsrep_trx_order_before(trx->mysql_thd, - lock2->trx->mysql_thd) && - (type_mode & LOCK_MODE_MASK) == LOCK_X && - (lock2->type_mode & LOCK_MODE_MASK) == LOCK_X) - { - /* exclusive lock conflicts are not accepted */ - fprintf(stderr, "BF-BF X lock conflict\n"); - lock_rec_print(stderr, lock2); - - abort(); - } else { - if (wsrep_debug) { - fprintf(stderr, - "BF conflict, modes: %lu %lu\n", - type_mode, - lock2->type_mode); -#ifdef OUT - fprintf(stderr, - "seqnos %llu %llu\n", - (long long)wsrep_thd_trx_seqno( - trx->mysql_thd), - (long long)wsrep_thd_trx_seqno( - lock2->trx->mysql_thd)); -#endif - } - return FALSE; - } - } -#endif /* WITH_WSREP */ /* We have somewhat complex rules when gap type record locks cause waits */ @@ -1071,6 +1029,44 @@ lock_rec_has_to_wait( return(FALSE); } +#ifdef WITH_WSREP + /* if BF thread is locking and has conflict with another BF + thread, we need to look at trx ordering and lock types */ + if (for_locking && + wsrep_thd_is_BF(trx->mysql_thd, FALSE) && + wsrep_thd_is_BF(lock2->trx->mysql_thd, TRUE)) { + + if (wsrep_debug) { + fprintf(stderr, "\n BF-BF lock conflict \n"); + lock_rec_print(stderr, lock2); + } + + if (wsrep_trx_order_before(trx->mysql_thd, + lock2->trx->mysql_thd) && + (type_mode & LOCK_MODE_MASK) == LOCK_X && + (lock2->type_mode & LOCK_MODE_MASK) == LOCK_X) + { + /* exclusive lock conflicts are not accepted */ + fprintf(stderr, "BF-BF X lock conflict\n"); + lock_rec_print(stderr, lock2); + abort(); + } else { + /* if lock2->index->n_uniq <= + lock2->index->n_user_defined_cols + operation is on uniq index + */ + if (wsrep_debug) fprintf(stderr, + "BF conflict, modes: %lu %lu, " + "idx: %s-%s n_uniq %u n_user %u\n", + type_mode, lock2->type_mode, + lock2->index->name, + lock2->index->table_name, + lock2->index->n_uniq, + lock2->index->n_user_defined_cols); + return FALSE; + } + } +#endif /* WITH_WSREP */ return(TRUE); } @@ -1578,8 +1574,7 @@ lock_rec_has_expl( static void lock_rec_discard(lock_t* in_lock); -#endif /* WITH_WSREP */ - +#endif #ifdef UNIV_DEBUG /*********************************************************************//** Checks if some other transaction has a lock request in the queue. @@ -1629,17 +1624,16 @@ lock_rec_other_has_expl_req( #endif /* UNIV_DEBUG */ #ifdef WITH_WSREP -static -void +static void wsrep_kill_victim(const trx_t * const trx, const lock_t *lock) { ut_ad(lock_mutex_own()); ut_ad(trx_mutex_own(lock->trx)); - my_bool bf_this = wsrep_thd_is_brute_force(trx->mysql_thd); - my_bool bf_other = wsrep_thd_is_brute_force(lock->trx->mysql_thd); + my_bool bf_this = wsrep_thd_is_BF(trx->mysql_thd, FALSE); + my_bool bf_other = wsrep_thd_is_BF(lock->trx->mysql_thd, TRUE); if ((bf_this && !bf_other) || (bf_this && bf_other && wsrep_trx_order_before( trx->mysql_thd, lock->trx->mysql_thd))) { - + if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) { if (wsrep_debug) fprintf(stderr, "WSREP: BF victim waiting\n"); @@ -1933,11 +1927,6 @@ lock_rec_create( lock->trx = trx; lock->type_mode = (type_mode & ~LOCK_TYPE_MASK) | LOCK_REC; -#ifdef WITH_WSREP - if (wsrep_thd_is_brute_force(trx->mysql_thd)) { - lock->type_mode |= WSREP_BF; - } -#endif /* WITH_WSREP */ lock->index = index; lock->un_member.rec_lock.space = space; @@ -1957,12 +1946,12 @@ lock_rec_create( ut_ad(index->table->n_ref_count > 0 || !index->table->can_be_evicted); #ifdef WITH_WSREP - if (c_lock && wsrep_thd_is_brute_force(trx->mysql_thd)) { + if (c_lock && wsrep_thd_is_BF(trx->mysql_thd, FALSE)) { lock_t *hash = (lock_t *)c_lock->hash; lock_t *prev = NULL; while (hash && - wsrep_thd_is_brute_force(((lock_t *)hash)->trx->mysql_thd) && + wsrep_thd_is_BF(((lock_t *)hash)->trx->mysql_thd, TRUE) && wsrep_trx_order_before( ((lock_t *)hash)->trx->mysql_thd, trx->mysql_thd)) { @@ -2376,11 +2365,6 @@ lock_rec_lock_fast( || (LOCK_MODE_MASK & mode) == LOCK_X); ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP || mode - (LOCK_MODE_MASK & mode) == 0 -#ifdef WITH_WSREP - || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == 0 - || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == LOCK_GAP - || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == LOCK_REC_NOT_GAP -#endif /* WITH_WSREP */ || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP); ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index)); @@ -2468,11 +2452,6 @@ lock_rec_lock_slow( || (LOCK_MODE_MASK & mode) == LOCK_X); ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP || mode - (LOCK_MODE_MASK & mode) == 0 -#ifdef WITH_WSREP - || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == 0 - || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == LOCK_GAP - || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == LOCK_REC_NOT_GAP -#endif /* WITH_WSREP */ || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP); ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index)); @@ -2582,19 +2561,8 @@ lock_rec_lock( || (LOCK_MODE_MASK & mode) == LOCK_X); ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP -#ifdef WITH_WSREP - || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == 0 - || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == LOCK_GAP - || mode - (LOCK_MODE_MASK & mode) - WSREP_BF == LOCK_REC_NOT_GAP -#endif /* WITH_WSREP */ || mode - (LOCK_MODE_MASK & mode) == 0); -#ifdef WITH_WSREP - if (wsrep_thd_is_brute_force(thr_get_trx(thr)->mysql_thd)) { - mode |= WSREP_BF; - } -#endif - ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index)); /* We try a simplified and faster subroutine for the most @@ -4040,17 +4008,19 @@ lock_deadlock_select_victim( if (trx_weight_ge(ctx->wait_lock->trx, ctx->start)) { /* The joining transaction is 'smaller', choose it as the victim and roll it back. */ + #ifdef WITH_WSREP - if (!wsrep_thd_is_brute_force(ctx->start->mysql_thd)) - return(ctx->start); - else + if (wsrep_thd_is_BF(ctx->start->mysql_thd, TRUE)) + return(ctx->wait_lock->trx); + else #endif /* WITH_WSREP */ return(ctx->start); } + #ifdef WITH_WSREP - if (wsrep_thd_is_brute_force(ctx->wait_lock->trx->mysql_thd)) + if (wsrep_thd_is_BF(ctx->wait_lock->trx->mysql_thd, TRUE)) return(ctx->start); - else + else #endif /* WITH_WSREP */ return(ctx->wait_lock->trx); } @@ -4182,7 +4152,7 @@ lock_deadlock_search( ctx->too_deep = TRUE; #ifdef WITH_WSREP - if (wsrep_thd_is_brute_force(ctx->start->mysql_thd)) + if (wsrep_thd_is_BF(ctx->start->mysql_thd, TRUE)) return(ctx->wait_lock->trx->id); else #endif /* WITH_WSREP */ @@ -4205,7 +4175,7 @@ lock_deadlock_search( ctx->too_deep = TRUE; #ifdef WITH_WSREP - if (wsrep_thd_is_brute_force(ctx->start->mysql_thd)) + if (wsrep_thd_is_BF(ctx->start->mysql_thd, TRUE)) return(lock->trx->id); else #endif /* WITH_WSREP */ @@ -4328,7 +4298,7 @@ lock_deadlock_check_and_resolve( ut_a(victim_trx_id == trx->id); #ifdef WITH_WSREP - if (!wsrep_thd_is_brute_force(ctx.start->mysql_thd)) + if (!wsrep_thd_is_BF(ctx.start->mysql_thd, TRUE)) { #endif /* WITH_WSREP */ if (!srv_read_only_mode) { @@ -4426,7 +4396,7 @@ lock_table_create( UT_LIST_ADD_LAST(trx_locks, trx->lock.trx_locks, lock); #ifdef WITH_WSREP - if (c_lock && wsrep_thd_is_brute_force(trx->mysql_thd)) { + if (c_lock && wsrep_thd_is_BF(trx->mysql_thd, FALSE)) { UT_LIST_INSERT_AFTER( un_member.tab_lock.locks, table->locks, c_lock, lock); } else { @@ -6531,7 +6501,7 @@ lock_rec_convert_impl_to_expl( if (rec_get_deleted_flag(rec, rec_offs_comp(offsets)) #ifdef WITH_WSREP - && !wsrep_thd_is_brute_force(impl_trx->mysql_thd) + && !wsrep_thd_is_BF(impl_trx->mysql_thd, FALSE) /* BF-BF conflict is possible if advancing into lock_rec_other_has_conflicting*/ #endif /* WITH_WSREP */ diff --git a/storage/xtradb/lock/lock0wait.cc b/storage/xtradb/lock/lock0wait.cc index a1c35e20ead..388c847f580 100644 --- a/storage/xtradb/lock/lock0wait.cc +++ b/storage/xtradb/lock/lock0wait.cc @@ -184,6 +184,28 @@ lock_wait_table_reserve_slot( return(NULL); } +#ifdef WITH_WSREP +/*********************************************************************//** +check if lock timeout was for priority thread, +as a side effect trigger lock monitor +@return false for regular lock timeout */ +static ibool +wsrep_is_BF_lock_timeout( +/*====================*/ + trx_t* trx) /* in: trx to check for lock priority */ +{ + if (wsrep_on(trx->mysql_thd) && + wsrep_thd_is_BF(trx->mysql_thd, FALSE)) { + fprintf(stderr, "WSREP: BF lock wait long\n"); + srv_print_innodb_monitor = TRUE; + srv_print_innodb_lock_monitor = TRUE; + os_event_set(srv_monitor_event); + return TRUE; + } + return FALSE; + } +#endif /* WITH_WSREP */ + /***************************************************************//** Puts a user OS thread to wait for a lock to be released. If an error occurs during the wait trx->error_state associated with thr is @@ -375,9 +397,15 @@ lock_wait_suspend_thread( if (lock_wait_timeout < 100000000 && wait_time > (double) lock_wait_timeout) { +#ifdef WITH_WSREP + if (!wsrep_is_BF_lock_timeout(trx)) { +#endif /* WITH_WSREP */ trx->error_state = DB_LOCK_WAIT_TIMEOUT; +#ifdef WITH_WSREP + } +#endif /* WITH_WSREP */ MONITOR_INC(MONITOR_TIMEOUT); } @@ -461,8 +489,13 @@ lock_wait_check_and_cancel( if (trx->lock.wait_lock) { ut_a(trx->lock.que_state == TRX_QUE_LOCK_WAIT); - +#ifdef WITH_WSREP + if (!wsrep_is_BF_lock_timeout(trx)) { +#endif /* WITH_WSREP */ lock_cancel_waiting_and_release(trx->lock.wait_lock); +#ifdef WITH_WSREP + } +#endif /* WITH_WSREP */ } lock_mutex_exit(); diff --git a/storage/xtradb/row/row0ins.cc b/storage/xtradb/row/row0ins.cc index c29c1f56d3f..ff92162c452 100644 --- a/storage/xtradb/row/row0ins.cc +++ b/storage/xtradb/row/row0ins.cc @@ -1938,9 +1938,6 @@ row_ins_scan_sec_index_for_duplicate( mem_heap_t* offsets_heap) /*!< in/out: memory heap that can be emptied */ { -#ifdef WITH_WSREP - trx_t* trx = thr_get_trx(thr); -#endif ulint n_unique; int cmp; ulint n_fields_cmp; @@ -2009,16 +2006,7 @@ row_ins_scan_sec_index_for_duplicate( if (flags & BTR_NO_LOCKING_FLAG) { /* Set no locks when applying log in online table rebuild. */ - -#ifdef WITH_WSREP - /* slave applier must not get duplicate error */ - } else if (allow_duplicates || - (wsrep_on(trx->mysql_thd) && - wsrep_thd_is_brute_force(trx->mysql_thd))) { -#else - } else if (allow_duplicates) { -#endif /* If the SQL-query will update or replace duplicate key we will take X-lock for @@ -2029,6 +2017,10 @@ row_ins_scan_sec_index_for_duplicate( lock_type, block, rec, index, offsets, thr); } else { +#ifdef WITH_WSREP + /* appliers don't need dupkey checks */ + if (!wsrep_thd_is_BF(thr_get_trx(thr)->mysql_thd, 0)) +#endif /* WITH_WSREP */ err = row_ins_set_shared_rec_lock( lock_type, block, rec, index, offsets, thr); } @@ -2224,13 +2216,7 @@ row_ins_duplicate_error_in_clust( sure that in roll-forward we get the same duplicate errors as in original execution */ -#ifdef WITH_WSREP - if (trx->duplicates || - (wsrep_on(trx->mysql_thd) && - wsrep_thd_is_brute_force(trx->mysql_thd))) { -#else if (trx->duplicates) { -#endif /* If the SQL-query will update or replace duplicate key we will take X-lock for @@ -2275,13 +2261,7 @@ duplicate: offsets = rec_get_offsets(rec, cursor->index, offsets, ULINT_UNDEFINED, &heap); -#ifdef WITH_WSREP - if (trx->duplicates || - (wsrep_on(trx->mysql_thd) && - wsrep_thd_is_brute_force(trx->mysql_thd))) { -#else if (trx->duplicates) { -#endif /* If the SQL-query will update or replace duplicate key we will take X-lock for diff --git a/storage/xtradb/row/row0upd.cc b/storage/xtradb/row/row0upd.cc index 9b733410c00..0f299228493 100644 --- a/storage/xtradb/row/row0upd.cc +++ b/storage/xtradb/row/row0upd.cc @@ -375,6 +375,8 @@ wsrep_row_upd_check_foreign_constraints( trx = thr_get_trx(thr); + /* TODO: make native slave thread bail out here */ + rec = btr_pcur_get_rec(pcur); ut_ad(rec_offs_validate(rec, index, offsets)); diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc index 46d81790fe8..6506daa3347 100644 --- a/storage/xtradb/srv/srv0srv.cc +++ b/storage/xtradb/srv/srv0srv.cc @@ -2998,27 +2998,6 @@ suspend_thread: OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */ } -#ifdef WITH_WSREP_TODO -/*********************************************************************//** -check if lock timeout was for priority thread, -as a side effect trigger lock monitor -@return false for regular lock timeout */ -static ibool -wsrep_is_BF_lock_timeout( -/*====================*/ - srv_slot_t* slot) /* in: lock slot to check for lock priority */ -{ - if (wsrep_on(thr_get_trx(slot->thr)->mysql_thd) && - wsrep_thd_is_brute_force((thr_get_trx(slot->thr))->mysql_thd)) { - fprintf(stderr, "WSREP: BF lock wait long\n"); - srv_print_innodb_monitor = TRUE; - srv_print_innodb_lock_monitor = TRUE; - os_event_set(lock_sys->timeout_event); - return TRUE; - } - return FALSE; - } -#endif /* WITH_WSREP_TODO */ /*********************************************************************//** Check if purge should stop. @return true if it should shutdown. */ diff --git a/storage/xtradb/trx/trx0sys.cc b/storage/xtradb/trx/trx0sys.cc index ed3d177820f..19d14bf6f38 100644 --- a/storage/xtradb/trx/trx0sys.cc +++ b/storage/xtradb/trx/trx0sys.cc @@ -178,7 +178,12 @@ trx_sys_flush_max_trx_id(void) mtr_t mtr; trx_sysf_t* sys_header; +#ifndef WITH_WSREP + /* wsrep_fake_trx_id violates this assert + * Copied from trx_sys_get_new_trx_id + */ ut_ad(mutex_own(&trx_sys->mutex)); +#endif /* WITH_WSREP */ if (!srv_read_only_mode) { mtr_start(&mtr); diff --git a/support-files/mysql.spec.sh b/support-files/mysql.spec.sh index 254c1ea197c..eee80bf8b72 100644 --- a/support-files/mysql.spec.sh +++ b/support-files/mysql.spec.sh @@ -1174,6 +1174,7 @@ echo "=====" >> $STATUS_HISTORY %attr(755, root, root) %{_bindir}/wsrep_sst_rsync %attr(755, root, root) %{_bindir}/wsrep_sst_rsync_wan %attr(755, root, root) %{_bindir}/wsrep_sst_xtrabackup +%attr(755, root, root) %{_bindir}/wsrep_sst_xtrabackup-v2 %endif %attr(755, root, root) %{_sbindir}/mysqld diff --git a/wsrep/wsrep_api.h b/wsrep/wsrep_api.h index f713de66d57..c3304d7ed7c 100644 --- a/wsrep/wsrep_api.h +++ b/wsrep/wsrep_api.h @@ -137,7 +137,11 @@ typedef void (*wsrep_log_cb_t)(wsrep_log_level_t, const char *); typedef uint64_t wsrep_trx_id_t; //!< application transaction ID typedef uint64_t wsrep_conn_id_t; //!< application connection ID typedef int64_t wsrep_seqno_t; //!< sequence number of a writeset, etc. +#ifdef __cplusplus +typedef bool wsrep_bool_t; +#else typedef _Bool wsrep_bool_t; //!< should be the same as standard (C99) bool +#endif /* __cplusplus */ /*! undefined seqno */ #define WSREP_SEQNO_UNDEFINED (-1) diff --git a/wsrep/wsrep_dummy.c b/wsrep/wsrep_dummy.c index 3c7a7c2e354..7b86e2e386a 100644 --- a/wsrep/wsrep_dummy.c +++ b/wsrep/wsrep_dummy.c @@ -20,11 +20,13 @@ #include <errno.h> #include <stdbool.h> +#include <string.h> /*! Dummy backend context. */ typedef struct wsrep_dummy { wsrep_log_cb_t log_fn; + char* options; } wsrep_dummy_t; /* Get pointer to wsrep_dummy context from wsrep_t pointer */ @@ -42,6 +44,10 @@ typedef struct wsrep_dummy static void dummy_free(wsrep_t *w) { WSREP_DBUG_ENTER(w); + if (WSREP_DUMMY(w)->options) + { + free(WSREP_DUMMY(w)->options); + } free(w->ctx); w->ctx = NULL; } @@ -49,8 +55,15 @@ static void dummy_free(wsrep_t *w) static wsrep_status_t dummy_init (wsrep_t* w, const struct wsrep_init_args* args) { - WSREP_DUMMY(w)->log_fn = args->logger_cb; WSREP_DBUG_ENTER(w); + WSREP_DUMMY(w)->log_fn = args->logger_cb; + if (args->options) + { + char* options = malloc(strlen(args->options) + 1); + if (options == NULL) return WSREP_WARNING; + strcpy(options, args->options); + WSREP_DUMMY(w)->options = options; + } return WSREP_OK; } @@ -61,16 +74,41 @@ static uint64_t dummy_capabilities (wsrep_t* w __attribute__((unused))) static wsrep_status_t dummy_options_set( wsrep_t* w, - const char* conf __attribute__((unused))) -{ - WSREP_DBUG_ENTER(w); + const char* conf) +{ + char* options = NULL; + WSREP_DBUG_ENTER(w); + // concatenate config string with ';' + options = WSREP_DUMMY(w)->options; + if (options == NULL) + { + options = malloc(strlen(conf) + 1); + if (options == NULL) + { + return WSREP_WARNING; + } + strcpy(options, conf); + } else { + int opt_len = strlen(options); + char* p = realloc(options, + opt_len + 1 + // ';' + strlen(conf) + 1); // \0 + if (p == NULL) + { + return WSREP_WARNING; + } + p[opt_len] = ';'; + strcpy(p + opt_len + 1, conf); + options = p; + } + WSREP_DUMMY(w)->options = options; return WSREP_OK; } static char* dummy_options_get (wsrep_t* w) { WSREP_DBUG_ENTER(w); - return NULL; + return WSREP_DUMMY(w)->options; } static wsrep_status_t dummy_connect( @@ -385,6 +423,7 @@ int wsrep_dummy_loader(wsrep_t* w) // initialize private context WSREP_DUMMY(w)->log_fn = NULL; + WSREP_DUMMY(w)->options = NULL; return 0; } |